summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>1992-11-08 02:50:43 +0000
committerJim Meyering <jim@meyering.net>1992-11-08 02:50:43 +0000
commitb25038ce9a234ea0906ddcbd8a0012e917e6c661 (patch)
treea4360f1b307910d9266f65fc851479c218219009
parentf33e06711c51330972e2adf07d21a4e69c8f44f6 (diff)
downloadcoreutils-b25038ce9a234ea0906ddcbd8a0012e917e6c661.tar.xz
Initial revision
-rw-r--r--COPYING339
-rw-r--r--lib/bcopy.c19
-rw-r--r--lib/linebuffer.c91
-rw-r--r--lib/linebuffer.h42
-rw-r--r--lib/memchr.c145
-rw-r--r--lib/memset.c29
-rw-r--r--lib/regex.c4870
-rw-r--r--lib/regex.h481
-rw-r--r--old/textutils/ChangeLog855
-rw-r--r--src/cat.c660
-rw-r--r--src/cksum.c274
-rw-r--r--src/comm.c221
-rw-r--r--src/csplit.c1308
-rw-r--r--src/cut.c586
-rw-r--r--src/expand.c377
-rw-r--r--src/fold.c250
-rw-r--r--src/head.c380
-rw-r--r--src/join.c690
-rw-r--r--src/nl.c546
-rw-r--r--src/od.c1697
-rw-r--r--src/paste.c458
-rw-r--r--src/pr.c1844
-rw-r--r--src/sort.c1746
-rw-r--r--src/split.c532
-rw-r--r--src/sum.c217
-rw-r--r--src/tac.c628
-rw-r--r--src/tail.c858
-rw-r--r--src/tr.c1813
-rw-r--r--src/unexpand.c432
-rw-r--r--src/uniq.c321
-rw-r--r--src/wc.c231
31 files changed, 22940 insertions, 0 deletions
diff --git a/COPYING b/COPYING
new file mode 100644
index 000000000..a43ea2126
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/lib/bcopy.c b/lib/bcopy.c
new file mode 100644
index 000000000..a8991c570
--- /dev/null
+++ b/lib/bcopy.c
@@ -0,0 +1,19 @@
+/* bcopy.c -- copy memory.
+ Copy LENGTH bytes from SOURCE to DEST. Does not null-terminate.
+ In the public domain.
+ By David MacKenzie <djm@gnu.ai.mit.edu>. */
+
+void
+bcopy (source, dest, length)
+ char *source, *dest;
+ unsigned length;
+{
+ if (source < dest)
+ /* Moving from low mem to hi mem; start at end. */
+ for (source += length, dest += length; length; --length)
+ *--dest = *--source;
+ else if (source != dest)
+ /* Moving from hi mem to low mem; start at beginning. */
+ for (; length; --length)
+ *dest++ = *source++;
+}
diff --git a/lib/linebuffer.c b/lib/linebuffer.c
new file mode 100644
index 000000000..7f53aed70
--- /dev/null
+++ b/lib/linebuffer.c
@@ -0,0 +1,91 @@
+/* linebuffer.c -- read arbitrarily long lines
+ Copyright (C) 1986, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Richard Stallman. */
+
+#include <stdio.h>
+#include "linebuffer.h"
+
+char *xmalloc ();
+char *xrealloc ();
+void free ();
+
+/* Initialize linebuffer LINEBUFFER for use. */
+
+void
+initbuffer (linebuffer)
+ struct linebuffer *linebuffer;
+{
+ linebuffer->length = 0;
+ linebuffer->size = 200;
+ linebuffer->buffer = (char *) xmalloc (linebuffer->size);
+}
+
+/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
+ Remove any newline. Does not null terminate.
+ Return LINEBUFFER, except at end of file return 0. */
+
+struct linebuffer *
+readline (linebuffer, stream)
+ struct linebuffer *linebuffer;
+ FILE *stream;
+{
+ int c;
+ char *buffer = linebuffer->buffer;
+ char *p = linebuffer->buffer;
+ char *end = buffer + linebuffer->size; /* Sentinel. */
+
+ if (feof (stream))
+ {
+ linebuffer->length = 0;
+ return 0;
+ }
+
+ while (1)
+ {
+ c = getc (stream);
+ if (p == end)
+ {
+ linebuffer->size *= 2;
+ buffer = (char *) xrealloc (buffer, linebuffer->size);
+ p += buffer - linebuffer->buffer;
+ linebuffer->buffer = buffer;
+ end = buffer + linebuffer->size;
+ }
+ if (c == EOF || c == '\n')
+ break;
+ *p++ = c;
+ }
+
+ if (feof (stream) && p == buffer)
+ {
+ linebuffer->length = 0;
+ return 0;
+ }
+ linebuffer->length = p - linebuffer->buffer;
+ return linebuffer;
+}
+
+/* Free linebuffer LINEBUFFER and its data, all allocated with malloc. */
+
+void
+freebuffer (linebuffer)
+ struct linebuffer *linebuffer;
+{
+ free (linebuffer->buffer);
+ free (linebuffer);
+}
diff --git a/lib/linebuffer.h b/lib/linebuffer.h
new file mode 100644
index 000000000..13abe18c7
--- /dev/null
+++ b/lib/linebuffer.h
@@ -0,0 +1,42 @@
+/* linebuffer.h -- declarations for reading arbitrarily long lines
+ Copyright (C) 1986, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* A `struct linebuffer' holds a line of text. */
+
+struct linebuffer
+{
+ long size; /* Allocated. */
+ long length; /* Used. */
+ char *buffer;
+};
+
+#ifdef __STDC__
+/* Initialize linebuffer LINEBUFFER for use. */
+void initbuffer (struct linebuffer *linebuffer);
+
+/* Read an arbitrarily long line of text from STREAM into LINEBUFFER.
+ Remove any newline. Does not null terminate.
+ Return LINEBUFFER, except at end of file return 0. */
+struct linebuffer *readline (struct linebuffer *linebuffer, FILE *stream);
+
+/* Free linebuffer LINEBUFFER and its data, all allocated with malloc. */
+void freebuffer (struct linebuffer *);
+#else
+void initbuffer ();
+struct linebuffer *readline ();
+void freebuffer ();
+#endif
diff --git a/lib/memchr.c b/lib/memchr.c
new file mode 100644
index 000000000..cb8d4a22a
--- /dev/null
+++ b/lib/memchr.c
@@ -0,0 +1,145 @@
+/* Copyright (C) 1991 Free Software Foundation, Inc.
+ Based on strlen implemention by Torbjorn Granlund (tege@sics.se),
+ with help from Dan Sahlin (dan@sics.se) and
+ commentary by Jim Blandy (jimb@ai.mit.edu);
+ adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu),
+ and implemented by Roland McGrath (roland@ai.mit.edu).
+
+The GNU C Library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Library General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+The GNU C Library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Library General Public License for more details.
+
+You should have received a copy of the GNU Library General Public
+License along with the GNU C Library; see the file COPYING.LIB. If
+not, write to the Free Software Foundation, Inc., 675 Mass Ave,
+Cambridge, MA 02139, USA. */
+
+
+
+
+/* Search no more than N bytes of S for C. */
+
+char *
+memchr(s, c, n)
+ unsigned char * s ;
+ int c ;
+ unsigned n;
+{
+ unsigned char *char_ptr;
+ unsigned long int *longword_ptr;
+ unsigned long int longword, magic_bits, charmask;
+
+ c = (unsigned char) c;
+
+ /* Handle the first few characters by reading one character at a time.
+ Do this until CHAR_PTR is aligned on a 4-byte border. */
+ for (char_ptr = s; n > 0 && ((unsigned long int) char_ptr & 3) != 0;
+ --n, ++char_ptr)
+ if (*char_ptr == c)
+ return (char *) char_ptr;
+
+ longword_ptr = (unsigned long int *) char_ptr;
+
+ /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits
+ the "holes." Note that there is a hole just to the left of
+ each byte, with an extra at the end:
+
+ bits: 01111110 11111110 11111110 11111111
+ bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD
+
+ The 1-bits make sure that carries propagate to the next 0-bit.
+ The 0-bits provide holes for carries to fall into. */
+ magic_bits = 0x7efefeff;
+
+ /* Set up a longword, each of whose bytes is C. */
+ charmask = c | (c << 8);
+ charmask |= charmask << 16;
+
+ /* Instead of the traditional loop which tests each character,
+ we will test a longword at a time. The tricky part is testing
+ if *any of the four* bytes in the longword in question are zero. */
+ while (n >= 4)
+ {
+ /* We tentatively exit the loop if adding MAGIC_BITS to
+ LONGWORD fails to change any of the hole bits of LONGWORD.
+
+ 1) Is this safe? Will it catch all the zero bytes?
+ Suppose there is a byte with all zeros. Any carry bits
+ propagating from its left will fall into the hole at its
+ least significant bit and stop. Since there will be no
+ carry from its most significant bit, the LSB of the
+ byte to the left will be unchanged, and the zero will be
+ detected.
+
+ 2) Is this worthwhile? Will it ignore everything except
+ zero bytes? Suppose every byte of LONGWORD has a bit set
+ somewhere. There will be a carry into bit 8. If bit 8
+ is set, this will carry into bit 16. If bit 8 is clear,
+ one of bits 9-15 must be set, so there will be a carry
+ into bit 16. Similarly, there will be a carry into bit
+ 24. If one of bits 24-30 is set, there will be a carry
+ into bit 31, so all of the hole bits will be changed.
+
+ The one misfire occurs when bits 24-30 are clear and bit
+ 31 is set; in this case, the hole at bit 31 is not
+ changed. If we had access to the processor carry flag,
+ we could close this loophole by putting the fourth hole
+ at bit 32!
+
+ So it ignores everything except 128's, when they're aligned
+ properly.
+
+ 3) But wait! Aren't we looking for C, not zero?
+ Good point. So what we do is XOR LONGWORD with a longword,
+ each of whose bytes is C. This turns each byte that is C
+ into a zero. */
+
+ longword = *longword_ptr++ ^ charmask;
+
+ /* Add MAGIC_BITS to LONGWORD. */
+ if ((((longword + magic_bits)
+
+ /* Set those bits that were unchanged by the addition. */
+ ^ ~longword)
+
+ /* Look at only the hole bits. If any of the hole bits
+ are unchanged, most likely one of the bytes was a
+ zero. */
+ & ~magic_bits) != 0)
+ {
+ /* Which of the bytes was C? If none of them were, it was
+ a misfire; continue the search. */
+
+ unsigned char *cp = ( unsigned char *) (longword_ptr - 1);
+
+ if (cp[0] == c)
+ return (char *) cp;
+ if (cp[1] == c)
+ return (char *) &cp[1];
+ if (cp[2] == c)
+ return (char *) &cp[2];
+ if (cp[3] == c)
+ return (char *) &cp[3];
+ }
+
+ n -= 4;
+ }
+
+ char_ptr = ( unsigned char *) longword_ptr;
+
+ while (n-- > 0)
+ {
+ if (*char_ptr == c)
+ return (char *) char_ptr;
+ else
+ ++char_ptr;
+ }
+
+ return 0;
+}
diff --git a/lib/memset.c b/lib/memset.c
new file mode 100644
index 000000000..0e819f20e
--- /dev/null
+++ b/lib/memset.c
@@ -0,0 +1,29 @@
+/* memset.c -- set an area of memory to a given value
+ Copyright (C) 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+char *
+memset (str, c, len)
+ char *str;
+ int c;
+ unsigned len;
+{
+ register char *st = str;
+
+ while (len-- > 0)
+ *st++ = c;
+ return str;
+}
diff --git a/lib/regex.c b/lib/regex.c
new file mode 100644
index 000000000..3129ed499
--- /dev/null
+++ b/lib/regex.c
@@ -0,0 +1,4870 @@
+/* Extended regular expression matching and search library,
+ version 0.11.
+ (Implements POSIX draft P10003.2/D11.2, except for
+ internationalization features.)
+
+ Copyright (C) 1985, 89, 90, 91, 92 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* AIX requires this to be the first thing in the file. */
+#if defined (_AIX) && !defined (REGEX_MALLOC)
+ #pragma alloca
+#endif
+
+#define _GNU_SOURCE
+
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+#include <sys/types.h>
+
+/* The `emacs' switch turns on certain matching commands
+ that make sense only in Emacs. */
+#ifdef emacs
+
+#include "config.h"
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
+
+/* Emacs uses `NULL' as a predicate. */
+#undef NULL
+
+#else /* not emacs */
+
+/* We used to test for `BSTRING' here, but only GCC and Emacs define
+ `BSTRING', as far as I know, and neither of them use this code. */
+#if USG || STDC_HEADERS
+#include <string.h>
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#define bcopy(s, d, n) memcpy ((d), (s), (n))
+#define bzero(s, n) memset ((s), 0, (n))
+#else
+#include <strings.h>
+#endif
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+
+/* Define the syntax stuff for \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+#ifndef Sword
+#define Sword 1
+#endif
+
+#ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+#else /* not SYNTAX_TABLE */
+
+/* How many characters in the character set. */
+#define CHAR_SET_SIZE 256
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void
+init_syntax_once ()
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 'a'; c <= 'z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = 'A'; c <= 'Z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = '0'; c <= '9'; c++)
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+
+#endif /* not SYNTAX_TABLE */
+
+#define SYNTAX(c) re_syntax_table[c]
+
+#endif /* not emacs */
+
+/* Get the interface, including the syntax bits. */
+#include "regex.h"
+
+
+/* isalpha etc. are used for the character classes. */
+#include <ctype.h>
+#ifndef isgraph
+#define isgraph(c) (isprint (c) && !isspace (c))
+#endif
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else
+/* As in Harbison and Steele. */
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ use `alloca' instead of `malloc'. This is because using malloc in
+ re_search* or re_match* could cause memory leaks when C-g is used in
+ Emacs; also, malloc is slower and causes storage fragmentation. On
+ the other hand, malloc is more portable, and easier to debug.
+
+ Because we sometimes use alloca, some routines have to be macros,
+ not functions -- `alloca'-allocated space disappears at the end of the
+ function it is called in. */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE malloc
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+
+#else /* not REGEX_MALLOC */
+
+/* Emacs already defines alloca, sometimes. */
+#ifndef alloca
+
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#ifndef _AIX /* Already did AIX, up at the top. */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+#define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable. */
+#define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ bcopy (source, destination, osize), \
+ destination)
+
+#endif /* not REGEX_MALLOC */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+#define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail. */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits. */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+/* These are the command codes that appear in compiled regular
+ expressions. Some opcodes are followed by argument bytes. A
+ command code can specify any interpretation whatsoever for its
+ arguments. Zero bytes may appear in the compiled regular expression.
+
+ The value of `exactn' is needed in search.c (search_buffer) in Emacs.
+ So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
+ `exactn' we use here must also be 1. */
+
+typedef enum
+{
+ no_op = 0,
+
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn = 1,
+
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
+
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ dummy_failure_jump,
+
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ succeed_n,
+
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ jump_n,
+
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ set_number_at,
+
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+
+#ifdef emacs
+ ,before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+
+ /* Matches any character whose syntax is specified. Followed by
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
+
+ /* Matches any character whose syntax is not that specified. */
+ notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern. */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+
+#define STORE_NUMBER(destination, number) \
+ do { \
+ (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; \
+ } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+ the byte after where the number is stored. Therefore, DESTINATION
+ must be an lvalue. */
+
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ do { \
+ STORE_NUMBER (destination, number); \
+ (destination) += 2; \
+ } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+ at SOURCE. */
+
+#define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number (dest, source)
+ int *dest;
+ unsigned char *source;
+{
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
+}
+
+#ifndef EXTRACT_MACROS /* To debug the macros. */
+#undef EXTRACT_NUMBER
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+ SOURCE must be an lvalue. */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ do { \
+ EXTRACT_NUMBER (destination, source); \
+ (source) += 2; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number_and_incr (destination, source)
+ int *destination;
+ unsigned char **source;
+{
+ extract_number (destination, *source);
+ *source += 2;
+}
+
+#ifndef EXTRACT_MACROS
+#undef EXTRACT_NUMBER_AND_INCR
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ extract_number_and_incr (&dest, &src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging. */
+#include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+#include <assert.h>
+
+static int debug = 0;
+
+#define DEBUG_STATEMENT(e) e
+#define DEBUG_PRINT1(x) if (debug) printf (x)
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) print_partial_compiled_pattern (s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+extern void printchar ();
+
+/* Print the fastmap in human-readable form. */
+
+void
+print_fastmap (fastmap)
+ char *fastmap;
+{
+ unsigned was_a_range = 0;
+ unsigned i = 0;
+
+ while (i < (1 << BYTEWIDTH))
+ {
+ if (fastmap[i++])
+ {
+ was_a_range = 0;
+ printchar (i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i])
+ {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range)
+ {
+ printf ("-");
+ printchar (i - 1);
+ }
+ }
+ }
+ putchar ('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+ the START pointer into it and ending just before the pointer END. */
+
+void
+print_partial_compiled_pattern (start, end)
+ unsigned char *start;
+ unsigned char *end;
+{
+ int mcnt, mcnt2;
+ unsigned char *p = start;
+ unsigned char *pend = end;
+
+ if (start == NULL)
+ {
+ printf ("(null)\n");
+ return;
+ }
+
+ /* Loop over pattern commands. */
+ while (p < pend)
+ {
+ switch ((re_opcode_t) *p++)
+ {
+ case no_op:
+ printf ("/no_op");
+ break;
+
+ case exactn:
+ mcnt = *p++;
+ printf ("/exactn/%d", mcnt);
+ do
+ {
+ putchar ('/');
+ printchar (*p++);
+ }
+ while (--mcnt);
+ break;
+
+ case start_memory:
+ mcnt = *p++;
+ printf ("/start_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case stop_memory:
+ mcnt = *p++;
+ printf ("/stop_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case duplicate:
+ printf ("/duplicate/%d", *p++);
+ break;
+
+ case anychar:
+ printf ("/anychar");
+ break;
+
+ case charset:
+ case charset_not:
+ {
+ register int c;
+
+ printf ("/charset%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
+
+ assert (p + *p < pend);
+
+ for (c = 0; c < *p; c++)
+ {
+ unsigned bit;
+ unsigned char map_byte = p[1 + c];
+
+ putchar ('/');
+
+ for (bit = 0; bit < BYTEWIDTH; bit++)
+ if (map_byte & (1 << bit))
+ printchar (c * BYTEWIDTH + bit);
+ }
+ p += 1 + *p;
+ break;
+ }
+
+ case begline:
+ printf ("/begline");
+ break;
+
+ case endline:
+ printf ("/endline");
+ break;
+
+ case on_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_jump/0/%d", mcnt);
+ break;
+
+ case on_failure_keep_string_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_keep_string_jump/0/%d", mcnt);
+ break;
+
+ case dummy_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/dummy_failure_jump/0/%d", mcnt);
+ break;
+
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
+ case maybe_pop_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/maybe_pop_jump/0/%d", mcnt);
+ break;
+
+ case pop_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/pop_failure_jump/0/%d", mcnt);
+ break;
+
+ case jump_past_alt:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump_past_alt/0/%d", mcnt);
+ break;
+
+ case jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump/0/%d", mcnt);
+ break;
+
+ case succeed_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case jump_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case set_number_at:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case wordbound:
+ printf ("/wordbound");
+ break;
+
+ case notwordbound:
+ printf ("/notwordbound");
+ break;
+
+ case wordbeg:
+ printf ("/wordbeg");
+ break;
+
+ case wordend:
+ printf ("/wordend");
+
+#ifdef emacs
+ case before_dot:
+ printf ("/before_dot");
+ break;
+
+ case at_dot:
+ printf ("/at_dot");
+ break;
+
+ case after_dot:
+ printf ("/after_dot");
+ break;
+
+ case syntaxspec:
+ printf ("/syntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+
+ case notsyntaxspec:
+ printf ("/notsyntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+#endif /* emacs */
+
+ case wordchar:
+ printf ("/wordchar");
+ break;
+
+ case notwordchar:
+ printf ("/notwordchar");
+ break;
+
+ case begbuf:
+ printf ("/begbuf");
+ break;
+
+ case endbuf:
+ printf ("/endbuf");
+ break;
+
+ default:
+ printf ("?%d", *(p-1));
+ }
+ }
+ printf ("/\n");
+}
+
+
+void
+print_compiled_pattern (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ unsigned char *buffer = bufp->buffer;
+
+ print_partial_compiled_pattern (buffer, buffer + bufp->used);
+ printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap)
+ {
+ printf ("fastmap: ");
+ print_fastmap (bufp->fastmap);
+ }
+
+ printf ("re_nsub: %d\t", bufp->re_nsub);
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);
+ printf ("can_be_null: %d\t", bufp->can_be_null);
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);
+ printf ("no_sub: %d\t", bufp->no_sub);
+ printf ("not_bol: %d\t", bufp->not_bol);
+ printf ("not_eol: %d\t", bufp->not_eol);
+ printf ("syntax: %d\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
+}
+
+
+void
+print_double_string (where, string1, size1, string2, size2)
+ const char *where;
+ const char *string1;
+ const char *string2;
+ int size1;
+ int size2;
+{
+ unsigned this_char;
+
+ if (where == NULL)
+ printf ("(null)");
+ else
+ {
+ if (FIRST_STRING_P (where))
+ {
+ for (this_char = where - string1; this_char < size1; this_char++)
+ printchar (string1[this_char]);
+
+ where = string2;
+ }
+
+ for (this_char = where - string2; this_char < size2; this_char++)
+ printchar (string2[this_char]);
+ }
+}
+
+#else /* not DEBUG */
+
+#undef assert
+#define assert(e)
+
+#define DEBUG_STATEMENT(e)
+#define DEBUG_PRINT1(x)
+#define DEBUG_PRINT2(x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there. */
+
+static const char *re_error_msg[] =
+ { NULL, /* REG_NOERROR */
+ "No match", /* REG_NOMATCH */
+ "Invalid regular expression", /* REG_BADPAT */
+ "Invalid collation character", /* REG_ECOLLATE */
+ "Invalid character class name", /* REG_ECTYPE */
+ "Trailing backslash", /* REG_EESCAPE */
+ "Invalid back reference", /* REG_ESUBREG */
+ "Unmatched [ or [^", /* REG_EBRACK */
+ "Unmatched ( or \\(", /* REG_EPAREN */
+ "Unmatched \\{", /* REG_EBRACE */
+ "Invalid content of \\{\\}", /* REG_BADBR */
+ "Invalid range end", /* REG_ERANGE */
+ "Memory exhausted", /* REG_ESPACE */
+ "Invalid preceding regular expression", /* REG_BADRPT */
+ "Premature end of regular expression", /* REG_EEND */
+ "Regular expression too big", /* REG_ESIZE */
+ "Unmatched ) or \\)", /* REG_ERPAREN */
+ };
+
+/* Subroutine declarations and macros for regex_compile. */
+
+static void store_op1 (), store_op2 ();
+static void insert_op1 (), insert_op2 ();
+static boolean at_begline_loc_p (), at_endline_loc_p ();
+static boolean group_in_compile_stack ();
+static reg_errcode_t compile_range ();
+
+/* Fetch the next character in the uncompiled pattern---translating it
+ if necessary. Also cast from a signed character in the constant
+ string passed to us by the user to an unsigned char that we can use
+ as an array index (in, e.g., `translate'). */
+#define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ if (translate) c = translate[c]; \
+ } while (0)
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D. We
+ cast the subscript to translate because some data is declared as
+ `char *', to avoid warnings when a string constant is passed. But
+ when we use a character as a subscript we must make it unsigned. */
+#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+
+
+/* Macros for outputting the compiled pattern into `buffer'. */
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE 32
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#define GET_BUFFER_SPACE(n) \
+ while (b - bufp->buffer + (n) > bufp->allocated) \
+ EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it. */
+#define BUF_PUSH(c) \
+ do { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (unsigned char) (c); \
+ } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+#define BUF_PUSH_2(c1, c2) \
+ do { \
+ GET_BUFFER_SPACE (2); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes. */
+#define BUF_PUSH_3(c1, c2, c3) \
+ do { \
+ GET_BUFFER_SPACE (3); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ *b++ = (unsigned char) (c3); \
+ } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+#define STORE_JUMP(op, loc, to) \
+ store_op1 (op, loc, (to) - (loc) - 3)
+
+/* Likewise, for a two-argument jump. */
+#define STORE_JUMP2(op, loc, to, arg) \
+ store_op2 (op, loc, (to) - (loc) - 3, arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP(op, loc, to) \
+ insert_op1 (op, loc, (to) - (loc) - 3, b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP2(op, loc, to, arg) \
+ insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
+#define MAX_BUF_SIZE (1L << 16)
+
+
+/* Extend the buffer by twice its current size via realloc and
+ reset the pointers that pointed into the old block to point to the
+ correct places in the new one. If extending the buffer results in it
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+#define EXTEND_BUFFER() \
+ do { \
+ unsigned char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+ if (bufp->buffer == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != bufp->buffer) \
+ { \
+ b = (b - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
+ if (fixup_alt_jump) \
+ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+ if (laststart) \
+ laststart = (laststart - old_buffer) + bufp->buffer; \
+ if (pending_exact) \
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+ } \
+ } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+ {start,stop}_memory, the maximum number of groups we can report
+ things about is what fits in that byte. */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack. */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+typedef int pattern_offset_t;
+
+typedef struct
+{
+ pattern_offset_t begalt_offset;
+ pattern_offset_t fixup_alt_jump;
+ pattern_offset_t inner_group_offset;
+ pattern_offset_t laststart_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (isdigit (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+#define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is set to zero;
+ `re_nsub' is set to the number of groups in PATTERN;
+ `not_bol' and `not_eol' are set to zero.
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+static reg_errcode_t
+regex_compile (pattern, size, syntax, bufp)
+ const char *pattern;
+ int size;
+ reg_syntax_t syntax;
+ struct re_pattern_buffer *bufp;
+{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register unsigned char c, c1;
+
+ /* A random tempory spot in PATTERN. */
+ const char *p1;
+
+ /* Points to the end of the buffer, where we should append. */
+ register unsigned char *b;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+ const char *p = pattern;
+ const char *pend = pattern + size;
+
+ /* How to translate the characters in the pattern. */
+ char *translate = bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell if a new exact-match
+ character can be added to that command or if the character requires
+ a new `exactn' command. */
+ unsigned char *pending_exact = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells, e.g., postfix * where to find the start of its
+ operand. Reset at the beginning of groups and alternatives. */
+ unsigned char *laststart = 0;
+
+ /* Address of beginning of regexp, or inside of last group. */
+ unsigned char *begalt;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+ const char *beg_interval;
+
+ /* Address of the place where a forward jump should go to the end of
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
+ unsigned char *fixup_alt_jump = 0;
+
+ /* Counts open-groups as they are encountered. Remembered for the
+ matching close-group on the compile stack, so the same register
+ number is put in the stop_memory as the start_memory. */
+ regnum_t regnum = 0;
+
+#ifdef DEBUG
+ DEBUG_PRINT1 ("\nCompiling pattern: ");
+ if (debug)
+ {
+ unsigned debug_count;
+
+ for (debug_count = 0; debug_count < size; debug_count++)
+ printchar (pattern[debug_count]);
+ putchar ('\n');
+ }
+#endif /* DEBUG */
+
+ /* Initialize the compile stack. */
+ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ return REG_ESPACE;
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ bufp->syntax = syntax;
+ bufp->fastmap_accurate = 0;
+ bufp->not_bol = bufp->not_eol = 0;
+
+ /* Set `used' to zero, so that if we return an error, the pattern
+ printer (for debugging) will think there's no pattern. We reset it
+ at the end. */
+ bufp->used = 0;
+
+ /* Always count groups, whether or not bufp->no_sub is set. */
+ bufp->re_nsub = 0;
+
+#if !defined (emacs) && !defined (SYNTAX_TABLE)
+ /* Initialize the syntax table. */
+ init_syntax_once ();
+#endif
+
+ if (bufp->allocated == 0)
+ {
+ if (bufp->buffer)
+ { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
+ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+ }
+ else
+ { /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+ }
+ if (!bufp->buffer) return REG_ESPACE;
+
+ bufp->allocated = INIT_BUF_SIZE;
+ }
+
+ begalt = b = bufp->buffer;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ BUF_PUSH (begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ BUF_PUSH (endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* Are we optimizing this jump? */
+ boolean keep_string_p = false;
+
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok)
+ { /* More than one repetition is allowed, so put in at the
+ end a backward relative jump from `b' to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump).
+
+ But if we are at the `*' in the exact sequence `.*\n',
+ insert an unconditional jump backwards to the .,
+ instead of the beginning of the loop. This way we only
+ push a failure point once, instead of every time
+ through the loop. */
+ assert (p - 1 > pattern);
+
+ /* Allocate the space for the jump. */
+ GET_BUFFER_SPACE (3);
+
+ /* We know we are not at the first character of the pattern,
+ because laststart was nonzero. And we've already
+ incremented `p', by the way, to be the character after
+ the `*'. Do we have to do something analogous here
+ for null bytes, because of RE_DOT_NOT_NULL? */
+ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+ && !(syntax & RE_DOT_NEWLINE))
+ { /* We have .*\n. */
+ STORE_JUMP (jump, b, laststart);
+ keep_string_p = true;
+ }
+ else
+ /* Anything else. */
+ STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+
+ /* We've added more stuff to the buffer. */
+ b += 3;
+ }
+
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+ : on_failure_jump,
+ laststart, b + 3);
+ pending_exact = 0;
+ b += 3;
+
+ if (!zero_times_ok)
+ {
+ /* At least one repetition is required, so insert a
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+ b += 3;
+ }
+ }
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH (anychar);
+ break;
+
+
+ case '[':
+ {
+ boolean had_char_class = false;
+
+ if (p == pend) return REG_EBRACK;
+
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE (34);
+
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Push the number of bytes in the bitmap. */
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* Clear the whole map. */
+ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-2] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_LIST_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) return REG_EBRACK;
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ SET_LIST_BIT (c1);
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ return REG_ERANGE;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret
+ = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) return REG_EBRACK;
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and:`]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+ int ch;
+ boolean is_alnum = STREQ (str, "alnum");
+ boolean is_alpha = STREQ (str, "alpha");
+ boolean is_blank = STREQ (str, "blank");
+ boolean is_cntrl = STREQ (str, "cntrl");
+ boolean is_digit = STREQ (str, "digit");
+ boolean is_graph = STREQ (str, "graph");
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_print = STREQ (str, "print");
+ boolean is_punct = STREQ (str, "punct");
+ boolean is_space = STREQ (str, "space");
+ boolean is_upper = STREQ (str, "upper");
+ boolean is_xdigit = STREQ (str, "xdigit");
+
+ if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) return REG_EBRACK;
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+ {
+ if ( (is_alnum && isalnum (ch))
+ || (is_alpha && isalpha (ch))
+ || (is_blank && isblank (ch))
+ || (is_cntrl && iscntrl (ch))
+ || (is_digit && isdigit (ch))
+ || (is_graph && isgraph (ch))
+ || (is_lower && islower (ch))
+ || (is_print && isprint (ch))
+ || (is_punct && ispunct (ch))
+ || (is_space && isspace (ch))
+ || (is_upper && isupper (ch))
+ || (is_xdigit && isxdigit (ch)))
+ SET_LIST_BIT (ch);
+ }
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ SET_LIST_BIT (c);
+ }
+ }
+
+ /* Discard any (non)matching list bytes that are all 0 at the
+ end of the map. Decrease the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_close;
+ else
+ goto normal_char;
+
+
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '|':
+ if (syntax & RE_NO_BK_VBAR)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '{':
+ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+ goto handle_interval;
+ else
+ goto normal_char;
+
+
+ case '\\':
+ if (p == pend) return REG_EESCAPE;
+
+ /* Do not translate the character after the \, so that we can
+ distinguish, e.g., \B from \b, even if we normally would
+ translate, e.g., B to b. */
+ PATFETCH_RAW (c);
+
+ switch (c)
+ {
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ handle_open:
+ bufp->re_nsub++;
+ regnum++;
+
+ if (COMPILE_STACK_FULL)
+ {
+ RETALLOC (compile_stack.stack, compile_stack.size << 1,
+ compile_stack_elt_t);
+ if (compile_stack.stack == NULL) return REG_ESPACE;
+
+ compile_stack.size <<= 1;
+ }
+
+ /* These are the values to restore when we hit end of this
+ group. They are all relative offsets, so that if the
+ whole pattern moves because of realloc, they will still
+ be valid. */
+ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
+ COMPILE_STACK_TOP.fixup_alt_jump
+ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+ COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+ COMPILE_STACK_TOP.regnum = regnum;
+
+ /* We will eventually replace the 0 with the number of
+ groups inner to this one. But do not push a
+ start_memory for groups beyond the last one we can
+ represent in the compiled pattern. */
+ if (regnum <= MAX_REGNUM)
+ {
+ COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
+ BUF_PUSH_3 (start_memory, regnum, 0);
+ }
+
+ compile_stack.avail++;
+
+ fixup_alt_jump = 0;
+ laststart = 0;
+ begalt = b;
+ break;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+ if (COMPILE_STACK_EMPTY)
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_backslash;
+ else
+ return REG_ERPAREN;
+
+ handle_close:
+ if (fixup_alt_jump)
+ { /* Push a dummy failure point at the end of the
+ alternative for a possible future
+ `pop_failure_jump' to pop. See comments at
+ `push_dummy_failure' in `re_match_2'. */
+ BUF_PUSH (push_dummy_failure);
+
+ /* We allocated space for this jump when we assigned
+ to `fixup_alt_jump', in the `handle_alt' case below. */
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+ }
+
+ /* See similar code for backslashed left paren above. */
+ if (COMPILE_STACK_EMPTY)
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ return REG_ERPAREN;
+
+ /* Since we just checked for an empty stack above, this
+ ``can't happen''. */
+ assert (compile_stack.avail != 0);
+ {
+ /* We don't just want to restore into `regnum', because
+ later groups should continue to be numbered higher,
+ as in `(ab)c(de)' -- the second group is #2. */
+ regnum_t this_group_regnum;
+
+ compile_stack.avail--;
+ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
+ fixup_alt_jump
+ = COMPILE_STACK_TOP.fixup_alt_jump
+ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
+ : 0;
+ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
+
+ /* We're at the end of the group, so now we know how many
+ groups were inside this one. */
+ if (this_group_regnum <= MAX_REGNUM)
+ {
+ unsigned char *inner_group_loc
+ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
+
+ *inner_group_loc = regnum - this_group_regnum;
+ BUF_PUSH_3 (stop_memory, this_group_regnum,
+ regnum - this_group_regnum);
+ }
+ }
+ break;
+
+
+ case '|': /* `\|'. */
+ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+ goto normal_backslash;
+ handle_alt:
+ if (syntax & RE_LIMITED_OPS)
+ goto normal_char;
+
+ /* Insert before the previous alternative a jump which
+ jumps to this alternative if the former fails. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (on_failure_jump, begalt, b + 6);
+ pending_exact = 0;
+ b += 3;
+
+ /* The alternative before this one has a jump after it
+ which gets executed if it gets matched. Adjust that
+ jump so it will jump to this alternative's analogous
+ jump (put in below, which in turn will jump to the next
+ (if any) alternative's such jump, etc.). The last such
+ jump jumps to the correct final destination. A picture:
+ _____ _____
+ | | | |
+ | v | v
+ a | b | c
+
+ If we are at `b,' then fixup_alt_jump right now points to a
+ three-byte space after `a.' We'll put in the jump, set
+ fixup_alt_jump to right after `b,' and leave behind three
+ bytes which we'll fill in when we get to after `c.' */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ /* Mark and leave space for a jump after this alternative,
+ to be filled in later either by next alternative or
+ when know we're at the end of a series of alternatives. */
+ fixup_alt_jump = b;
+ GET_BUFFER_SPACE (3);
+ b += 3;
+
+ laststart = 0;
+ begalt = b;
+ break;
+
+
+ case '{':
+ /* If \{ is a literal. */
+ if (!(syntax & RE_INTERVALS)
+ /* If we're at `\{' and it's not the open-interval
+ operator. */
+ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ || (p - 2 == pattern && p == pend))
+ goto normal_backslash;
+
+ handle_interval:
+ {
+ /* If got here, then the syntax allows intervals. */
+
+ /* At least (most) this many matches must be made. */
+ int lower_bound = -1, upper_bound = -1;
+
+ beg_interval = p - 1;
+
+ if (p == pend)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_EBRACE;
+ }
+
+ GET_UNSIGNED_NUMBER (lower_bound);
+
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+ }
+ else
+ /* Interval such as `{1}' => match exactly once. */
+ upper_bound = lower_bound;
+
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (c != '\\') return REG_EBRACE;
+
+ PATFETCH (c);
+ }
+
+ if (c != '}')
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ /* We just parsed a valid interval. */
+
+ /* If it's invalid to have no preceding re. */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ laststart = b;
+ else
+ goto unfetch_interval;
+ }
+
+ /* If the upper bound is zero, don't want to succeed at
+ all; jump from `laststart' to `b + 3', which will be
+ the end of the buffer after we insert the jump. */
+ if (upper_bound == 0)
+ {
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (jump, laststart, b + 3);
+ b += 3;
+ }
+
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
+ else
+ { /* If the upper bound is > 1, we need to insert
+ more at the end of the loop. */
+ unsigned nbytes = 10 + (upper_bound > 1) * 10;
+
+ GET_BUFFER_SPACE (nbytes);
+
+ /* Initialize lower bound of the `succeed_n', even
+ though it will be set during matching by its
+ attendant `set_number_at' (inserted next),
+ because `re_compile_fastmap' needs to know.
+ Jump to the `jump_n' we might insert below. */
+ INSERT_JUMP2 (succeed_n, laststart,
+ b + 5 + (upper_bound > 1) * 5,
+ lower_bound);
+ b += 5;
+
+ /* Code to initialize the lower bound. Insert
+ before the `succeed_n'. The `5' is the last two
+ bytes of this `set_number_at', plus 3 bytes of
+ the following `succeed_n'. */
+ insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+ b += 5;
+
+ if (upper_bound > 1)
+ { /* More than one repetition is allowed, so
+ append a backward jump to the `succeed_n'
+ that starts this interval.
+
+ When we've reached this during matching,
+ we'll have matched the interval once, so
+ jump back only `upper_bound - 1' times. */
+ STORE_JUMP2 (jump_n, b, laststart + 5,
+ upper_bound - 1);
+ b += 5;
+
+ /* The location we want to set is the second
+ parameter of the `jump_n'; that is `b-2' as
+ an absolute address. `laststart' will be
+ the `set_number_at' we're about to insert;
+ `laststart+3' the number to set, the source
+ for the relative address. But we are
+ inserting into the middle of the pattern --
+ so everything is getting moved up by 5.
+ Conclusion: (b - 2) - (laststart + 3) + 5,
+ i.e., b - laststart.
+
+ We insert this at the beginning of the loop
+ so that if we fail during matching, we'll
+ reinitialize the bounds. */
+ insert_op2 (set_number_at, laststart, b - laststart,
+ upper_bound - 1, b);
+ b += 5;
+ }
+ }
+ pending_exact = 0;
+ beg_interval = NULL;
+ }
+ break;
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ assert (beg_interval);
+ p = beg_interval;
+ beg_interval = NULL;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH (c);
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (p > pattern && p[-1] == '\\')
+ goto normal_backslash;
+ }
+ goto normal_char;
+
+#ifdef emacs
+ /* There is no way to specify the before_dot and after_dot
+ operators. rms says this is ok. --karl */
+ case '=':
+ BUF_PUSH (at_dot);
+ break;
+
+ case 's':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+ break;
+
+ case 'S':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+ break;
+#endif /* emacs */
+
+
+ case 'w':
+ laststart = b;
+ BUF_PUSH (wordchar);
+ break;
+
+
+ case 'W':
+ laststart = b;
+ BUF_PUSH (notwordchar);
+ break;
+
+
+ case '<':
+ BUF_PUSH (wordbeg);
+ break;
+
+ case '>':
+ BUF_PUSH (wordend);
+ break;
+
+ case 'b':
+ BUF_PUSH (wordbound);
+ break;
+
+ case 'B':
+ BUF_PUSH (notwordbound);
+ break;
+
+ case '`':
+ BUF_PUSH (begbuf);
+ break;
+
+ case '\'':
+ BUF_PUSH (endbuf);
+ break;
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
+
+ c1 = c - '0';
+
+ if (c1 > regnum)
+ return REG_ESUBREG;
+
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack (compile_stack, c1))
+ goto normal_char;
+
+ laststart = b;
+ BUF_PUSH_2 (duplicate, c1);
+ break;
+
+
+ case '+':
+ case '?':
+ if (syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backslash;
+
+ default:
+ normal_backslash:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ c = TRANSLATE (c);
+ goto normal_char;
+ }
+ break;
+
+
+ default:
+ /* Expects the character in `c'. */
+ normal_char:
+ /* If no exactn currently being built. */
+ if (!pending_exact
+
+ /* If last exactn not at current position. */
+ || pending_exact + *pending_exact + 1 != b
+
+ /* We have only one byte following the exactn for the count. */
+ || *pending_exact == (1 << BYTEWIDTH) - 1
+
+ /* If followed by a repetition operator. */
+ || *p == '*' || *p == '^'
+ || ((syntax & RE_BK_PLUS_QM)
+ ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+ : (*p == '+' || *p == '?'))
+ || ((syntax & RE_INTERVALS)
+ && ((syntax & RE_NO_BK_BRACES)
+ ? *p == '{'
+ : (p[0] == '\\' && p[1] == '{'))))
+ {
+ /* Start building a new exactn. */
+
+ laststart = b;
+
+ BUF_PUSH_2 (exactn, 0);
+ pending_exact = b - 1;
+ }
+
+ BUF_PUSH (c);
+ (*pending_exact)++;
+ break;
+ } /* switch (c) */
+ } /* while p != pend */
+
+
+ /* Through the pattern now. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ if (!COMPILE_STACK_EMPTY)
+ return REG_EPAREN;
+
+ free (compile_stack.stack);
+
+ /* We have succeeded; set the length of the buffer. */
+ bufp->used = b - bufp->buffer;
+
+#ifdef DEBUG
+ if (debug)
+ {
+ DEBUG_PRINT1 ("\nCompiled pattern: ");
+ print_compiled_pattern (bufp);
+ }
+#endif /* DEBUG */
+
+ return REG_NOERROR;
+} /* regex_compile */
+
+/* Subroutines for `regex_compile'. */
+
+/* Store OP at LOC followed by two-byte integer parameter ARG. */
+
+static void
+store_op1 (op, loc, arg)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg);
+}
+
+
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+store_op2 (op, loc, arg1, arg2)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg1);
+ STORE_NUMBER (loc + 3, arg2);
+}
+
+
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+ for OP followed by two-byte integer parameter ARG. */
+
+static void
+insert_op1 (op, loc, arg, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 3;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op1 (op, loc, arg);
+}
+
+
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+insert_op2 (op, loc, arg1, arg2, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 5;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op2 (op, loc, arg1, arg2);
+}
+
+
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ after an alternative or a begin-subexpression. We assume there is at
+ least one character before the ^. */
+
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+ const char *pattern, *p;
+ reg_syntax_t syntax;
+{
+ const char *prev = p - 2;
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+
+ return
+ /* After a subexpression? */
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+ /* After an alternative? */
+ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
+
+
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ at least one character after the $, i.e., `P < PEND'. */
+
+static boolean
+at_endline_loc_p (p, pend, syntax)
+ const char *p, *pend;
+ int syntax;
+{
+ const char *next = p;
+ boolean next_backslash = *next == '\\';
+ const char *next_next = p + 1 < pend ? p + 1 : NULL;
+
+ return
+ /* Before a subexpression? */
+ (syntax & RE_NO_BK_PARENS ? *next == ')'
+ : next_backslash && next_next && *next_next == ')')
+ /* Before an alternative? */
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'
+ : next_backslash && next_next && *next_next == '|');
+}
+
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+ false if it's not. */
+
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+ compile_stack_type compile_stack;
+ regnum_t regnum;
+{
+ int this_element;
+
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0;
+ this_element--)
+ if (compile_stack.stack[this_element].regnum == regnum)
+ return true;
+
+ return false;
+}
+
+
+/* Read the ending character of a range (in a bracket expression) from the
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ Then we set the translation of all bits between the starting and
+ ending characters (inclusive) in the compiled pattern B.
+
+ Return an error code.
+
+ We use these short variable names so we can use the same macros as
+ `regex_compile' itself. */
+
+static reg_errcode_t
+compile_range (p_ptr, pend, translate, syntax, b)
+ const char **p_ptr, *pend;
+ char *translate;
+ reg_syntax_t syntax;
+ unsigned char *b;
+{
+ unsigned this_char;
+
+ const char *p = *p_ptr;
+
+ /* Even though the pattern is a signed `char *', we need to fetch into
+ `unsigned char's. Reason: if the high bit of the pattern character
+ is set, the range endpoints will be negative if we fetch into a
+ signed `char *'. */
+ unsigned char range_end;
+ unsigned char range_start = p[-2];
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ PATFETCH (range_end);
+
+ /* Have to increment the pointer into the pattern string, so the
+ caller isn't still at the ending character. */
+ (*p_ptr)++;
+
+ /* If the start is after the end, the range is empty. */
+ if (range_start > range_end)
+ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+ /* Here we see why `this_char' has to be larger than an `unsigned
+ char' -- the range is inclusive, so if `range_end' == 0xff
+ (assuming 8-bit characters), we would otherwise go into an infinite
+ loop, since all characters <= 0xff. */
+ for (this_char = range_start; this_char <= range_end; this_char++)
+ {
+ SET_LIST_BIT (TRANSLATE (this_char));
+ }
+
+ return REG_NOERROR;
+}
+
+/* Failure stack declarations and macros; both re_compile_fastmap and
+ re_match_2 use a failure stack. These have to be macros because of
+ REGEX_ALLOCATE. */
+
+
+/* Number of failure points for which to initially allocate space
+ when matching. If this number is exceeded, we allocate more
+ space, so it is not a hard limit. */
+#ifndef INIT_FAILURE_ALLOC
+#define INIT_FAILURE_ALLOC 5
+#endif
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always used MAX_FAILURE_SPACE each time we failed.
+ This is a variable only so users of regex can assign to it; we never
+ change it ourselves. */
+int re_max_failures = 2000;
+
+typedef const unsigned char *fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
+
+
+/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
+
+#define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (fail_stack_elt_t *) \
+ REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+ Return 1 if succeeds, and 0 if either ran out of memory
+ allocating space for it or it was already too large.
+
+ REGEX_REALLOCATE requires `destination' be declared. */
+
+#define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
+ ? 0 \
+ : ((fail_stack).stack = (fail_stack_elt_t *) \
+ REGEX_REALLOCATE ((fail_stack).stack, \
+ (fail_stack).size * sizeof (fail_stack_elt_t), \
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
+ \
+ (fail_stack).stack == NULL \
+ ? 0 \
+ : ((fail_stack).size <<= 1, \
+ 1)))
+
+
+/* Push PATTERN_OP on FAIL_STACK.
+
+ Return 1 if was able to do so and 0 if ran out of memory allocating
+ space to do so. */
+#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (fail_stack)) \
+ ? 0 \
+ : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
+ 1))
+
+/* This pushes an item onto the failure stack. Must be a four-byte
+ value. Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_ITEM(item) \
+ fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
+
+/* The complement operation. Assumes `fail_stack' is nonempty. */
+#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging. */
+#ifdef DEBUG
+#define DEBUG_PUSH PUSH_FAILURE_ITEM
+#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
+#else
+#define DEBUG_PUSH(item)
+#define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+ if we ever fail back to it.
+
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
+ declared.
+
+ Does `return FAILURE_CODE' if runs out of memory. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
+ do { \
+ char *destination; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ int this_reg; \
+ \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+ \
+ DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
+ \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
+ (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+ } \
+ \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
+ \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
+ \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ PUSH_FAILURE_ITEM (regstart[this_reg]); \
+ \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ PUSH_FAILURE_ITEM (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
+ } \
+ \
+ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
+ PUSH_FAILURE_ITEM (lowest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
+ PUSH_FAILURE_ITEM (highest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_ITEM (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_ITEM (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+ } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+ for each register. */
+#define NUM_REG_ITEMS 3
+
+/* Individual items aside from the registers. */
+#ifdef DEBUG
+#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+#else
+#define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack. */
+#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items. */
+#define NUM_FAILURE_ITEMS \
+ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
+ + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it. */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
+ int this_reg; \
+ const unsigned char *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_ITEM (); \
+ if (string_temp != NULL) \
+ str = (const char *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (unsigned char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (unsigned) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
+ \
+ low_reg = (unsigned) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
+ \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
+ \
+ regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ } \
+} /* POP_FAILURE_POINT */
+
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
+ characters can start a string that matches the pattern. This fastmap
+ is used by re_search to skip quickly over impossible starting points.
+
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+ area as BUFP->fastmap.
+
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+ the pattern buffer.
+
+ Returns 0 if we succeed, -2 if an internal error. */
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ int j, k;
+ fail_stack_type fail_stack;
+#ifndef REGEX_MALLOC
+ char *destination;
+#endif
+ /* We don't push any register information onto the failure stack. */
+ unsigned num_regs = 0;
+
+ register char *fastmap = bufp->fastmap;
+ unsigned char *pattern = bufp->buffer;
+ unsigned long size = bufp->used;
+ const unsigned char *p = pattern;
+ register unsigned char *pend = pattern + size;
+
+ /* Assume that each path through the pattern can be null until
+ proven otherwise. We set this false at the bottom of switch
+ statement, to which we get only if a particular path doesn't
+ match the empty string. */
+ boolean path_can_be_null = true;
+
+ /* We aren't doing a `succeed_n' to begin with. */
+ boolean succeed_n_p = false;
+
+ assert (fastmap != NULL && p != NULL);
+
+ INIT_FAIL_STACK ();
+ bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ bufp->fastmap_accurate = 1; /* It will be when we're done. */
+ bufp->can_be_null = 0;
+
+ while (p != pend || !FAIL_STACK_EMPTY ())
+ {
+ if (p == pend)
+ {
+ bufp->can_be_null |= path_can_be_null;
+
+ /* Reset for next path. */
+ path_can_be_null = true;
+
+ p = fail_stack.stack[--fail_stack.avail];
+ }
+
+ /* We should never be about to go beyond the end of the pattern. */
+ assert (p < pend);
+
+#ifdef SWITCH_ENUM_BUG
+ switch ((int) ((re_opcode_t) *p++))
+#else
+ switch ((re_opcode_t) *p++)
+#endif
+ {
+
+ /* I guess the idea here is to simply not bother with a fastmap
+ if a backreference is used, since it's too hard to figure out
+ the fastmap for the corresponding group. Setting
+ `can_be_null' stops `re_search_2' from using the fastmap, so
+ that is all we do. */
+ case duplicate:
+ bufp->can_be_null = 1;
+ return 0;
+
+
+ /* Following are the cases which match a character. These end
+ with `break'. */
+
+ case exactn:
+ fastmap[p[1]] = 1;
+ break;
+
+
+ case charset:
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+ fastmap[j] = 1;
+ break;
+
+
+ case charset_not:
+ /* Chars beyond end of map must be allowed. */
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+ fastmap[j] = 1;
+ break;
+
+
+ case wordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case notwordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case anychar:
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = 0;
+
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ return 0;
+
+ /* Otherwise, have to check alternative paths. */
+ break;
+
+
+#ifdef emacs
+ case syntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ case notsyntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ /* All cases after this match the empty string. These end with
+ `continue'. */
+
+
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ continue;
+#endif /* not emacs */
+
+
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ case push_dummy_failure:
+ continue;
+
+
+ case jump_n:
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case jump_past_alt:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+ if (j > 0)
+ continue;
+
+ /* Jump backward implies we just went through the body of a
+ loop and matched nothing. Opcode jumped to should be
+ `on_failure_jump' or `succeed_n'. Just treat it like an
+ ordinary jump. For a * loop, it has pushed its failure
+ point already; if so, discard that as redundant. */
+ if ((re_opcode_t) *p != on_failure_jump
+ && (re_opcode_t) *p != succeed_n)
+ continue;
+
+ p++;
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+
+ /* If what's on the stack is where we are now, pop it. */
+ if (!FAIL_STACK_EMPTY ()
+ && fail_stack.stack[fail_stack.avail - 1] == p)
+ fail_stack.avail--;
+
+ continue;
+
+
+ case on_failure_jump:
+ case on_failure_keep_string_jump:
+ handle_on_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+
+ /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+ end of the pattern. We don't want to push such a point,
+ since when we restore it above, entering the switch will
+ increment `p' past the end of the pattern. We don't need
+ to push such a point since we obviously won't find any more
+ fastmap entries beyond `pend'. Such a pattern can match
+ the null string, though. */
+ if (p + j < pend)
+ {
+ if (!PUSH_PATTERN_OP (p + j, fail_stack))
+ return -2;
+ }
+ else
+ bufp->can_be_null = 1;
+
+ if (succeed_n_p)
+ {
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ succeed_n_p = false;
+ }
+
+ continue;
+
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p += 2;
+
+ /* Increment p past the n for when k != 0. */
+ EXTRACT_NUMBER_AND_INCR (k, p);
+ if (k == 0)
+ {
+ p -= 4;
+ succeed_n_p = true; /* Spaghetti code alert. */
+ goto handle_on_failure_jump;
+ }
+ continue;
+
+
+ case set_number_at:
+ p += 4;
+ continue;
+
+
+ case start_memory:
+ case stop_memory:
+ p += 2;
+ continue;
+
+
+ default:
+ abort (); /* We have listed all the cases. */
+ } /* switch *p++ */
+
+ /* Getting here means we have found the possible starting
+ characters for one path of the pattern -- and that the empty
+ string does not match. We need not follow this path further.
+ Instead, look at the next alternative (remembered on the
+ stack), or quit if no more. The test at the top of the loop
+ does these things. */
+ path_can_be_null = false;
+ p = pend;
+ } /* while p */
+
+ /* Set `can_be_null' for the last path (also the first path, if the
+ pattern is empty). */
+ bufp->can_be_null |= path_can_be_null;
+ return 0;
+} /* re_compile_fastmap */
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t) 0;
+ }
+}
+
+/* Searching routines. */
+
+/* Like re_search_2, below, but only one string is specified, and
+ doesn't let you say where to stop matching. */
+
+int
+re_search (bufp, string, size, startpos, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, startpos, range;
+ struct re_registers *regs;
+{
+ return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
+ regs, size);
+}
+
+
+/* Using the compiled pattern in BUFP->buffer, first tries to match the
+ virtual concatenation of STRING1 and STRING2, starting first at index
+ STARTPOS, then at STARTPOS + 1, and so on.
+
+ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+
+ RANGE is how far to scan while trying to match. RANGE = 0 means try
+ only at STARTPOS; in general, the last start tried is STARTPOS +
+ RANGE.
+
+ In REGS, return the indices of the virtual concatenation of STRING1
+ and STRING2 that matched the entire BUFP->buffer and its contained
+ subexpressions.
+
+ Do not consider matching one past the index STOP in the virtual
+ concatenation of STRING1 and STRING2.
+
+ We return either the position in the strings at which the match was
+ found, -1 if no match, or -2 if error (such as failure
+ stack overflow). */
+
+int
+re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+ int stop;
+{
+ int val;
+ register char *fastmap = bufp->fastmap;
+ register char *translate = bufp->translate;
+ int total_size = size1 + size2;
+ int endpos = startpos + range;
+
+ /* Check for out-of-range STARTPOS. */
+ if (startpos < 0 || startpos > total_size)
+ return -1;
+
+ /* Fix up RANGE if it might eventually take us outside
+ the virtual concatenation of STRING1 and STRING2. */
+ if (endpos < -1)
+ range = -1 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !bufp->fastmap_accurate)
+ if (re_compile_fastmap (bufp) == -2)
+ return -2;
+
+ /* If the search isn't to be a backwards one, don't waste time in a
+ long search for a pattern that says it is anchored. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf
+ && range > 0)
+ {
+ if (startpos > 0)
+ return -1;
+ else
+ range = 1;
+ }
+
+ for (;;)
+ {
+ /* If a fastmap is supplied, skip quickly over characters that
+ cannot be the start of a match. If the pattern can match the
+ null string, however, we don't need to skip characters; we want
+ the first null string. */
+ if (fastmap && startpos < total_size && !bufp->can_be_null)
+ {
+ if (range > 0) /* Searching forwards. */
+ {
+ register const char *d;
+ register int lim = 0;
+ int irange = range;
+
+ if (startpos < size1 && startpos + range >= size1)
+ lim = range - (size1 - startpos);
+
+ d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+
+ /* Written out as an if-else to avoid testing `translate'
+ inside the loop. */
+ if (translate)
+ while (range > lim
+ && !fastmap[(unsigned char) translate[*d++]])
+ range--;
+ else
+ while (range > lim && !fastmap[(unsigned char) *d++])
+ range--;
+
+ startpos += irange - range;
+ }
+ else /* Searching backwards. */
+ {
+ register char c = (size1 == 0 || startpos >= size1
+ ? string2[startpos - size1]
+ : string1[startpos]);
+
+ if (!fastmap[TRANSLATE (c)])
+ goto advance;
+ }
+ }
+
+ /* If can't match the null string, and that's all we have left, fail. */
+ if (range >= 0 && startpos == total_size && fastmap
+ && !bufp->can_be_null)
+ return -1;
+
+ val = re_match_2 (bufp, string1, size1, string2, size2,
+ startpos, regs, stop);
+ if (val >= 0)
+ return startpos;
+
+ if (val == -2)
+ return -2;
+
+ advance:
+ if (!range)
+ break;
+ else if (range > 0)
+ {
+ range--;
+ startpos++;
+ }
+ else
+ {
+ range++;
+ startpos--;
+ }
+ }
+ return -1;
+} /* re_search_2 */
+
+/* Declarations and macros for re_match_2. */
+
+static int bcmp_translate ();
+static boolean alt_match_null_string_p (),
+ common_op_match_null_string_p (),
+ group_match_null_string_p ();
+
+/* Structure for per-register (a.k.a. per-group) information.
+ This must not be longer than one word, because we push this value
+ onto the failure stack. Other register information, such as the
+ starting and ending positions (which are addresses), and the list of
+ inner groups (which is a bits list) are maintained in separate
+ variables.
+
+ We are making a (strictly speaking) nonportable assumption here: that
+ the compiler will pack our bit fields into something that fits into
+ the type of `word', i.e., is something that fits into one item on the
+ failure stack. */
+typedef union
+{
+ fail_stack_elt_t word;
+ struct
+ {
+ /* This field is one if this group can match the empty string,
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+#define MATCH_NULL_UNSET_VALUE 3
+ unsigned match_null_string_p : 2;
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+ unsigned ever_matched_something : 1;
+ } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R) ((R).bits.is_active)
+#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched something; it sets `matched' flags for the
+ registers corresponding to the group of which we currently are inside.
+ Also records whether this group ever matched something. We only care
+ about this information at `stop_memory', and then only about the
+ previous time through the loop (if the group is starred or whatever).
+ So it is ok to clear all the nonactive registers here. */
+#define SET_REGS_MATCHED() \
+ do \
+ { \
+ unsigned r; \
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \
+ { \
+ MATCHED_SOMETHING (reg_info[r]) \
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \
+ = 1; \
+ } \
+ } \
+ while (0)
+
+
+/* This converts PTR, a pointer into one of the search strings `string1'
+ and `string2' into an offset from the beginning of that string. */
+#define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
+
+/* Registers are set to a sentinel when they haven't yet matched. */
+#define REG_UNSET_VALUE ((char *) -1)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+
+/* Macros for dealing with the split strings in re_match_2. */
+
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
+
+/* Call before fetching a character with *d. This switches over to
+ string2 if necessary. */
+#define PREFETCH() \
+ while (d == dend) \
+ { \
+ /* End of string2 => fail. */ \
+ if (dend == end_match_2) \
+ goto fail; \
+ /* End of string1 => advance to string2. */ \
+ d = string2; \
+ dend = end_match_2; \
+ }
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ of `string1' and `string2'. If only one string, it's `string2'. */
+#define AT_STRINGS_BEG() (d == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END() (d == end2)
+
+
+/* Test if D points to a character which is word-constituent. We have
+ two special cases to check for: if past the end of string1, look at
+ the first character in string2; and if before the beginning of
+ string2, look at the last character in string1.
+
+ Assumes `string1' exists, so use in conjunction with AT_STRINGS_BEG (). */
+#define LETTER_P(d) \
+ (SYNTAX ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == Sword)
+
+/* Test if the character before D and the one at D differ with respect
+ to being word-constituent. */
+#define AT_WORD_BOUNDARY(d) \
+ (AT_STRINGS_BEG () || AT_STRINGS_END () || LETTER_P (d - 1) != LETTER_P (d))
+
+
+/* Free everything we malloc. */
+#ifdef REGEX_MALLOC
+#define FREE_VAR(var) if (var) free (var); var = NULL
+#define FREE_VARIABLES() \
+ do { \
+ FREE_VAR (fail_stack.stack); \
+ FREE_VAR (regstart); \
+ FREE_VAR (regend); \
+ FREE_VAR (old_regstart); \
+ FREE_VAR (old_regend); \
+ FREE_VAR (best_regstart); \
+ FREE_VAR (best_regend); \
+ FREE_VAR (reg_info); \
+ FREE_VAR (reg_dummy); \
+ FREE_VAR (reg_info_dummy); \
+ } while (0)
+#else /* not REGEX_MALLOC */
+/* Some MIPS systems (at least) want this to free alloca'd storage. */
+#define FREE_VARIABLES() alloca (0)
+#endif /* not REGEX_MALLOC */
+
+
+/* These values must meet several constraints. They must not be valid
+ register values; since we have a limit of 255 registers (because
+ we use only one byte in the pattern for the register number), we can
+ use numbers larger than 255. They must differ by 1, because of
+ NUM_FAILURE_ITEMS above. And the value for the lowest register must
+ be larger than the value for the highest register, so we do not try
+ to actually save any registers when none are active. */
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+
+/* Matching routines. */
+
+#ifndef emacs /* Emacs never uses this. */
+/* re_match is like re_match_2 except it takes only a single string. */
+
+int
+re_match (bufp, string, size, pos, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, pos;
+ struct re_registers *regs;
+ {
+ return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
+}
+#endif /* not emacs */
+
+
+/* re_match_2 matches the compiled pattern in BUFP against the
+ the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+ and SIZE2, respectively). We start matching at POS, and stop
+ matching at STOP.
+
+ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+ store offsets for the substring each group matched in REGS. See the
+ documentation for exactly how many groups we fill.
+
+ We return -1 if no match, -2 if an internal error (such as the
+ failure stack overflowing). Otherwise, we return the length of the
+ matched substring. */
+
+int
+re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+{
+ /* General temporaries. */
+ int mcnt;
+ unsigned char *p1;
+
+ /* Just past the end of the corresponding string. */
+ const char *end1, *end2;
+
+ /* Pointers into string1 and string2, just past the last characters in
+ each to consider matching. */
+ const char *end_match_1, *end_match_2;
+
+ /* Where we are in the data, and the end of the current string. */
+ const char *d, *dend;
+
+ /* Where we are in the pattern, and the end of the pattern. */
+ unsigned char *p = bufp->buffer;
+ register unsigned char *pend = p + bufp->used;
+
+ /* We use this to map every character in the string. */
+ char *translate = bufp->translate;
+
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to
+ the subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where
+ to resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is
+ a ``dummy''; if a failure happens and the failure point is a dummy,
+ it gets discarded and the next next one is tried. */
+ fail_stack_type fail_stack;
+#ifdef DEBUG
+ static unsigned failure_id = 0;
+#endif
+
+ /* We fill all the registers internally, independent of what we
+ return, for use in backreferences. The number here includes
+ an element for register zero. */
+ unsigned num_regs = bufp->re_nsub + 1;
+
+ /* The currently active registers. */
+ unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+
+ /* Information on the contents of registers. These are pointers into
+ the input strings; they record just what was matched (on this
+ attempt) by a subexpression part of the pattern, that is, the
+ regnum-th regstart pointer points to where in the pattern we began
+ matching and the regnum-th regend points to right after where we
+ stopped matching the regnum-th subexpression. (The zeroth register
+ keeps track of what the whole pattern matches.) */
+ const char **regstart, **regend;
+
+ /* If a group that's operated upon by a repetition operator fails to
+ match anything, then the register for its start will need to be
+ restored because it will have been set to wherever in the string we
+ are when we last see its open-group operator. Similarly for a
+ register's end. */
+ const char **old_regstart, **old_regend;
+
+ /* The is_active field of reg_info helps us keep track of which (possibly
+ nested) subexpressions we are currently in. The matched_something
+ field of reg_info[reg_num] helps us tell whether or not we have
+ matched any of the pattern so far this time through the reg_num-th
+ subexpression. These two fields get reset each time through any
+ loop their register is in. */
+ register_info_type *reg_info;
+
+ /* The following record the register info as found in the above
+ variables when we find a match better than any we've seen before.
+ This happens as we backtrack through the failure points, which in
+ turn happens only if we have not yet matched the entire string. */
+ unsigned best_regs_set = false;
+ const char **best_regstart, **best_regend;
+
+ /* Logically, this is `best_regend[0]'. But we don't want to have to
+ allocate space for that if we're not allocating space for anything
+ else (see below). Also, we never need info about register 0 for
+ any of the other register vectors, and it seems rather a kludge to
+ treat `best_regend' differently than the rest. So we keep track of
+ the end of the best match so far in a separate variable. We
+ initialize this to NULL so that when we backtrack the first time
+ and need to test it, it's not garbage. */
+ const char *match_end = NULL;
+
+ /* Used when we pop values we don't care about. */
+ const char **reg_dummy;
+ register_info_type *reg_info_dummy;
+
+#ifdef DEBUG
+ /* Counts the total number of registers pushed. */
+ unsigned num_regs_pushed = 0;
+#endif
+
+ DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
+
+ INIT_FAIL_STACK ();
+
+ /* Do not bother to initialize all the register variables if there are
+ no groups in the pattern, as it takes a fair amount of time. If
+ there are groups, we include space for register 0 (the whole
+ pattern), even though we never use it, since it simplifies the
+ array indexing. We should fix this. */
+ if (bufp->re_nsub)
+ {
+ regstart = REGEX_TALLOC (num_regs, const char *);
+ regend = REGEX_TALLOC (num_regs, const char *);
+ old_regstart = REGEX_TALLOC (num_regs, const char *);
+ old_regend = REGEX_TALLOC (num_regs, const char *);
+ best_regstart = REGEX_TALLOC (num_regs, const char *);
+ best_regend = REGEX_TALLOC (num_regs, const char *);
+ reg_info = REGEX_TALLOC (num_regs, register_info_type);
+ reg_dummy = REGEX_TALLOC (num_regs, const char *);
+ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
+ && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+#ifdef REGEX_MALLOC
+ else
+ {
+ /* We must initialize all our variables to NULL, so that
+ `FREE_VARIABLES' doesn't try to free them. Too bad this isn't
+ Lisp, so we could have a list of variables. As it is, */
+ regstart = regend = old_regstart = old_regend = best_regstart
+ = best_regend = reg_dummy = NULL;
+ reg_info = reg_info_dummy = (register_info_type *) NULL;
+ }
+#endif /* REGEX_MALLOC */
+
+ /* The starting position is bogus. */
+ if (pos < 0 || pos > size1 + size2)
+ {
+ FREE_VARIABLES ();
+ return -1;
+ }
+
+ /* Initialize subexpression text positions to -1 to mark ones that no
+ start_memory/stop_memory has been seen for. Also initialize the
+ register information struct. */
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = regend[mcnt]
+ = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+
+ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+ IS_ACTIVE (reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ }
+
+ /* We move `string1' into `string2' if the latter's empty -- but not if
+ `string1' is null. */
+ if (size2 == 0 && string1 != NULL)
+ {
+ string2 = string1;
+ size2 = size1;
+ string1 = 0;
+ size1 = 0;
+ }
+ end1 = string1 + size1;
+ end2 = string2 + size2;
+
+ /* Compute where to stop matching, within the two strings. */
+ if (stop <= size1)
+ {
+ end_match_1 = string1 + stop;
+ end_match_2 = string2;
+ }
+ else
+ {
+ end_match_1 = end1;
+ end_match_2 = string2 + stop - size1;
+ }
+
+ /* `p' scans through the pattern as `d' scans through the data.
+ `dend' is the end of the input string that `d' points within. `d'
+ is advanced into the following input string whenever necessary, but
+ this happens before fetching; therefore, at the beginning of the
+ loop, `d' can be pointing at the end of a string, but it cannot
+ equal `string2'. */
+ if (size1 > 0 && pos <= size1)
+ {
+ d = string1 + pos;
+ dend = end_match_1;
+ }
+ else
+ {
+ d = string2 + pos - size1;
+ dend = end_match_2;
+ }
+
+ DEBUG_PRINT1 ("The compiled pattern is: ");
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
+ DEBUG_PRINT1 ("The string to match is: `");
+ DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
+ DEBUG_PRINT1 ("'\n");
+
+ /* This loops over pattern commands. It exits by returning from the
+ function if the match is complete, or it drops through if the match
+ fails at this starting point in the input data. */
+ for (;;)
+ {
+ DEBUG_PRINT2 ("\n0x%x: ", p);
+
+ if (p == pend)
+ { /* End of pattern means we might have succeeded. */
+ DEBUG_PRINT1 ("End of pattern: ");
+ /* If not end of string, try backtracking. Otherwise done. */
+ if (d != end_match_2)
+ {
+ DEBUG_PRINT1 ("backtracking.\n");
+
+ if (!FAIL_STACK_EMPTY ())
+ { /* More failure points to try. */
+ boolean same_str_p = (FIRST_STRING_P (match_end)
+ == MATCHING_IN_FIRST_STRING);
+
+ /* If exceeds best match so far, save it. */
+ if (!best_regs_set
+ || (same_str_p && d > match_end)
+ || (!same_str_p && !MATCHING_IN_FIRST_STRING))
+ {
+ best_regs_set = true;
+ match_end = d;
+
+ DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
+
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ best_regstart[mcnt] = regstart[mcnt];
+ best_regend[mcnt] = regend[mcnt];
+ }
+ }
+ goto fail;
+ }
+
+ /* If no failure points, don't restore garbage. */
+ else if (best_regs_set)
+ {
+ restore_best_regs:
+ /* Restore best match. It may happen that `dend ==
+ end_match_1' while the restored d is in string2.
+ For example, the pattern `x.*y.*z' against the
+ strings `x-' and `y-z-', if the two strings are
+ not consecutive in memory. */
+ d = match_end;
+ dend = ((d >= string1 && d <= end1)
+ ? end_match_1 : end_match_2);
+
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = best_regstart[mcnt];
+ regend[mcnt] = best_regend[mcnt];
+ }
+ }
+ } /* d != end_match_2 */
+
+ DEBUG_PRINT1 ("\nAccepting match.\n");
+
+ /* If caller wants register contents data back, do it. */
+ if (regs && !bufp->no_sub)
+ {
+ /* Have the register data arrays been allocated? */
+ if (bufp->regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. We need one
+ extra element beyond `num_regs' for the `-1' marker
+ GNU code uses. */
+ regs->num_regs = MAX (RE_NREGS, num_regs + 1);
+ regs->start = TALLOC (regs->num_regs, regoff_t);
+ regs->end = TALLOC (regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ return -2;
+ bufp->regs_allocated = REGS_REALLOCATE;
+ }
+ else if (bufp->regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < num_regs + 1)
+ {
+ regs->num_regs = num_regs + 1;
+ RETALLOC (regs->start, regs->num_regs, regoff_t);
+ RETALLOC (regs->end, regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ return -2;
+ }
+ }
+ else
+ assert (bufp->regs_allocated == REGS_FIXED);
+
+ /* Convert the pointer data in `regstart' and `regend' to
+ indices. Register zero has to be set differently,
+ since we haven't kept track of any info for it. */
+ if (regs->num_regs > 0)
+ {
+ regs->start[0] = pos;
+ regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
+ : d - string2 + size1);
+ }
+
+ /* Go through the first `min (num_regs, regs->num_regs)'
+ registers, since that is all we initialized. */
+ for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
+ {
+ if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ else
+ {
+ regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
+ regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
+ }
+ }
+
+ /* If the regs structure we return has more elements than
+ were in the pattern, set the extra elements to -1. If
+ we (re)allocated the registers, this is the case,
+ because we always allocate enough to have at least one
+ -1 at the end. */
+ for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ } /* regs && !bufp->no_sub */
+
+ FREE_VARIABLES ();
+ DEBUG_PRINT2 ("%d registers pushed.\n", num_regs_pushed);
+
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+ ? string1
+ : string2 - size1);
+
+ DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+
+ return mcnt;
+ }
+
+ /* Otherwise match next pattern command. */
+#ifdef SWITCH_ENUM_BUG
+ switch ((int) ((re_opcode_t) *p++))
+#else
+ switch ((re_opcode_t) *p++)
+#endif
+ {
+ /* Ignore these. Used to ignore the n of succeed_n's which
+ currently have n == 0. */
+ case no_op:
+ DEBUG_PRINT1 ("EXECUTING no_op.\n");
+ break;
+
+
+ /* Match the next n pattern characters exactly. The following
+ byte in the pattern defines n, and the n bytes after that
+ are the characters to match. */
+ case exactn:
+ mcnt = *p++;
+ DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
+
+ /* This is written out as an if-else so we don't waste time
+ testing `translate' inside the loop. */
+ if (translate)
+ {
+ do
+ {
+ PREFETCH ();
+ if (translate[(unsigned char) *d++] != (char) *p++)
+ goto fail;
+ }
+ while (--mcnt);
+ }
+ else
+ {
+ do
+ {
+ PREFETCH ();
+ if (*d++ != (char) *p++) goto fail;
+ }
+ while (--mcnt);
+ }
+ SET_REGS_MATCHED ();
+ break;
+
+
+ /* Match any character except possibly a newline or a null. */
+ case anychar:
+ DEBUG_PRINT1 ("EXECUTING anychar.\n");
+
+ PREFETCH ();
+
+ if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
+ || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
+ goto fail;
+
+ SET_REGS_MATCHED ();
+ DEBUG_PRINT2 (" Matched `%d'.\n", *d);
+ d++;
+ break;
+
+
+ case charset:
+ case charset_not:
+ {
+ register unsigned char c;
+ boolean not = (re_opcode_t) *(p - 1) == charset_not;
+
+ DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+
+ PREFETCH ();
+ c = TRANSLATE (*d); /* The character to match. */
+
+ /* Cast to `unsigned' instead of `unsigned char' in case the
+ bit list is a full 32 bytes long. */
+ if (c < (unsigned) (*p * BYTEWIDTH)
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ p += 1 + *p;
+
+ if (!not) goto fail;
+
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+ }
+
+
+ /* The beginning of a group is represented by start_memory.
+ The arguments are the register number in the next byte, and the
+ number of groups inner to this one in the next. The text
+ matched within the group is recorded (in the internal
+ registers data structure) under the register number. */
+ case start_memory:
+ DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
+
+ /* Find out if this group can match the empty string. */
+ p1 = p; /* To send to group_match_null_string_p. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[*p])
+ = group_match_null_string_p (&p1, pend, reg_info);
+
+ /* Save the position in the string where we were the last time
+ we were at this open-group operator in case the group is
+ operated upon by a repetition operator, e.g., with `(a*)*b'
+ against `ab'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+ : regstart[*p];
+ DEBUG_PRINT2 (" old_regstart: %d\n",
+ POINTER_TO_OFFSET (old_regstart[*p]));
+
+ regstart[*p] = d;
+ DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
+
+ IS_ACTIVE (reg_info[*p]) = 1;
+ MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* This is the new highest active register. */
+ highest_active_reg = *p;
+
+ /* If nothing was active before, this is the new lowest active
+ register. */
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *p;
+
+ /* Move past the register number and inner group count. */
+ p += 2;
+ break;
+
+
+ /* The stop_memory opcode represents the end of a group. Its
+ arguments are the same as start_memory's: the register
+ number, and the number of inner groups. */
+ case stop_memory:
+ DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
+
+ /* We need to save the string position the last time we were at
+ this close-group operator in case the group is operated
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+ against `aba'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regend[*p]) ? d : regend[*p]
+ : regend[*p];
+ DEBUG_PRINT2 (" old_regend: %d\n",
+ POINTER_TO_OFFSET (old_regend[*p]));
+
+ regend[*p] = d;
+ DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
+
+ /* This register isn't active anymore. */
+ IS_ACTIVE (reg_info[*p]) = 0;
+
+ /* If this was the only register active, nothing is active
+ anymore. */
+ if (lowest_active_reg == highest_active_reg)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ { /* We must scan for the new highest active register, since
+ it isn't necessarily one less than now: consider
+ (a(b)c(d(e)f)g). When group 3 ends, after the f), the
+ new highest active register is 1. */
+ unsigned char r = *p - 1;
+ while (r > 0 && !IS_ACTIVE (reg_info[r]))
+ r--;
+
+ /* If we end up at register zero, that means that we saved
+ the registers as the result of an `on_failure_jump', not
+ a `start_memory', and we jumped to past the innermost
+ `stop_memory'. For example, in ((.)*) we save
+ registers 1 and 2 as a result of the *, but when we pop
+ back to the second ), we are at the stop_memory 1.
+ Thus, nothing is active. */
+ if (r == 0)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ highest_active_reg = r;
+ }
+
+ /* If just failed to match something this time around with a
+ group that's operated on by a repetition operator, try to
+ force exit from the ``loop,'' and restore the register
+ information for this group that we had before trying this
+ last match. */
+ if ((!MATCHED_SOMETHING (reg_info[*p])
+ || (re_opcode_t) p[-3] == start_memory)
+ && (p + 2) < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ p1 = p + 2;
+ mcnt = 0;
+ switch ((re_opcode_t) *p1++)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (is_a_jump_n)
+ p1 += 2;
+ break;
+
+ default:
+ /* do nothing */ ;
+ }
+ p1 += mcnt;
+
+ /* If the next operation is a jump backwards in the pattern
+ to an on_failure_jump right before the start_memory
+ corresponding to this stop_memory, exit from the loop
+ by forcing a failure after pushing on the stack the
+ on_failure_jump's jump in the pattern, and d. */
+ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
+ && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
+ {
+ /* If this group ever matched anything, then restore
+ what its registers were before trying this last
+ failed match, e.g., with `(a*)*b' against `ab' for
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'
+ against `aba' for regend[3].
+
+ Also restore the registers for inner groups for,
+ e.g., `((a*)(b*))*' against `aba' (register 3 would
+ otherwise get trashed). */
+
+ if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+ {
+ unsigned r;
+
+ EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Restore this and inner groups' (if any) registers. */
+ for (r = *p; r < *p + *(p + 1); r++)
+ {
+ regstart[r] = old_regstart[r];
+
+ /* xx why this test? */
+ if ((int) old_regend[r] >= (int) regstart[r])
+ regend[r] = old_regend[r];
+ }
+ }
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
+
+ goto fail;
+ }
+ }
+
+ /* Move past the register number and the inner group count. */
+ p += 2;
+ break;
+
+
+ /* \<digit> has been turned into a `duplicate' command which is
+ followed by the numeric value of <digit> as the register number. */
+ case duplicate:
+ {
+ register const char *d2, *dend2;
+ int regno = *p++; /* Get which register to match against. */
+ DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
+
+ /* Can't back reference a group which we've never matched. */
+ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
+ goto fail;
+
+ /* Where in input to try to start matching. */
+ d2 = regstart[regno];
+
+ /* Where to stop matching; if both the place to start and
+ the place to stop matching are in the same string, then
+ set to the place to stop, otherwise, for now have to use
+ the end of the first string. */
+
+ dend2 = ((FIRST_STRING_P (regstart[regno])
+ == FIRST_STRING_P (regend[regno]))
+ ? regend[regno] : end_match_1);
+ for (;;)
+ {
+ /* If necessary, advance to next segment in register
+ contents. */
+ while (d2 == dend2)
+ {
+ if (dend2 == end_match_2) break;
+ if (dend2 == regend[regno]) break;
+
+ /* End of string1 => advance to string2. */
+ d2 = string2;
+ dend2 = regend[regno];
+ }
+ /* At end of register contents => success */
+ if (d2 == dend2) break;
+
+ /* If necessary, advance to next segment in data. */
+ PREFETCH ();
+
+ /* How many characters left in this segment to match. */
+ mcnt = dend - d;
+
+ /* Want how many consecutive characters we can match in
+ one shot, so, if necessary, adjust the count. */
+ if (mcnt > dend2 - d2)
+ mcnt = dend2 - d2;
+
+ /* Compare that many; failure if mismatch, else move
+ past them. */
+ if (translate
+ ? bcmp_translate (d, d2, mcnt, translate)
+ : bcmp (d, d2, mcnt))
+ goto fail;
+ d += mcnt, d2 += mcnt;
+ }
+ }
+ break;
+
+
+ /* begline matches the empty string at the beginning of the string
+ (unless `not_bol' is set in `bufp'), and, if
+ `newline_anchor' is set, after newlines. */
+ case begline:
+ DEBUG_PRINT1 ("EXECUTING begline.\n");
+
+ if (AT_STRINGS_BEG ())
+ {
+ if (!bufp->not_bol) break;
+ }
+ else if (d[-1] == '\n' && bufp->newline_anchor)
+ {
+ break;
+ }
+ /* In all other cases, we fail. */
+ goto fail;
+
+
+ /* endline is the dual of begline. */
+ case endline:
+ DEBUG_PRINT1 ("EXECUTING endline.\n");
+
+ if (AT_STRINGS_END ())
+ {
+ if (!bufp->not_eol) break;
+ }
+
+ /* We have to ``prefetch'' the next character. */
+ else if ((d == end1 ? *string2 : *d) == '\n'
+ && bufp->newline_anchor)
+ {
+ break;
+ }
+ goto fail;
+
+
+ /* Match at the very beginning of the data. */
+ case begbuf:
+ DEBUG_PRINT1 ("EXECUTING begbuf.\n");
+ if (AT_STRINGS_BEG ())
+ break;
+ goto fail;
+
+
+ /* Match at the very end of the data. */
+ case endbuf:
+ DEBUG_PRINT1 ("EXECUTING endbuf.\n");
+ if (AT_STRINGS_END ())
+ break;
+ goto fail;
+
+
+ /* on_failure_keep_string_jump is used to optimize `.*\n'. It
+ pushes NULL as the value for the string on the stack. Then
+ `pop_failure_point' will keep the current value for the
+ string, instead of restoring it. To see why, consider
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;
+ then the . fails against the \n. But the next thing we want
+ to do is match the \n against the \n; if we restored the
+ string value, we would be back at the foo.
+
+ Because this is used only in specific cases, we don't need to
+ check all the things that `on_failure_jump' does, to make
+ sure the right things get saved on the stack. Hence we don't
+ share its code. The only reason to push anything on the
+ stack at all is that otherwise we would have to change
+ `anychar's code to do something besides goto fail in this
+ case; that seems worse than this. */
+ case on_failure_keep_string_jump:
+ DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
+
+ PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
+ break;
+
+
+ /* Uses of on_failure_jump:
+
+ Each alternative starts with an on_failure_jump that points
+ to the beginning of the next alternative. Each alternative
+ except the last ends with a jump that in effect jumps past
+ the rest of the alternatives. (They really jump to the
+ ending jump of the following alternative, because tensioning
+ these jumps is a hassle.)
+
+ Repeats start with an on_failure_jump that points past both
+ the repetition text and either the following jump or
+ pop_failure_jump back to this on_failure_jump. */
+ case on_failure_jump:
+ on_failure:
+ DEBUG_PRINT1 ("EXECUTING on_failure_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
+
+ /* If this on_failure_jump comes right before a group (i.e.,
+ the original * applied to a group), save the information
+ for that group and all inner ones, so that if we fail back
+ to this point, the group's information will be correct.
+ For example, in \(a*\)*\1, we only need the preceding group,
+ and in \(\(a*\)b*\)\2, we need the inner group. */
+
+ /* We can't use `p' to check ahead because we push
+ a failure point to `p + mcnt' after we do this. */
+ p1 = p;
+
+ /* We need to skip no_op's before we look for the
+ start_memory in case this on_failure_jump is happening as
+ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+ against aba. */
+ while (p1 < pend && (re_opcode_t) *p1 == no_op)
+ p1++;
+
+ if (p1 < pend && (re_opcode_t) *p1 == start_memory)
+ {
+ /* We have a new highest active register now. This will
+ get reset at the start_memory we are about to get to,
+ but we will have saved all the registers relevant to
+ this repetition op, as described above. */
+ highest_active_reg = *(p1 + 1) + *(p1 + 2);
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *(p1 + 1);
+ }
+
+ DEBUG_PRINT1 (":\n");
+ PUSH_FAILURE_POINT (p + mcnt, d, -2);
+ break;
+
+
+ /* A smart repeat ends with a maybe_pop_jump.
+ We change it either to a pop_failure_jump or a jump. */
+ case maybe_pop_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
+ {
+ register unsigned char *p2 = p;
+
+ /* Compare the beginning of the repeat with what in the
+ pattern follows its end. If we can establish that there
+ is nothing that they would both match, i.e., that we
+ would have to backtrack because of (as in, e.g., `a*a')
+ then we can change to pop_failure_jump, because we'll
+ never have to backtrack.
+
+ This is not true in the case of alternatives: in
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
+ (e.g., if the string was `ab'). But instead of trying to
+ detect that here, the alternative has put on a dummy
+ failure point which is what we will end up popping. */
+
+ /* Skip over open/close-group commands. */
+ while (p2 + 2 < pend
+ && ((re_opcode_t) *p2 == stop_memory
+ || (re_opcode_t) *p2 == start_memory))
+ p2 += 3; /* Skip over args, too. */
+
+ /* If we're at the end of the pattern, we can change. */
+ if (p2 == pend)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" End of pattern: change to `pop_failure_jump'.\n");
+ }
+
+ else if ((re_opcode_t) *p2 == exactn
+ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
+ {
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+ p1 = p + mcnt;
+
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
+ if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+ p[-3] = (unsigned char) pop_failure_jump;
+ else if ((re_opcode_t) p1[3] == charset
+ || (re_opcode_t) p1[3] == charset_not)
+ {
+ int not = (re_opcode_t) p1[3] == charset_not;
+
+ if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ /* `not' is equal to 1 if c would match, which means
+ that we can't change to pop_failure_jump. */
+ if (!not)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" No match: change to `pop_failure_jump'.\n");
+
+ }
+ }
+ }
+ }
+ p -= 2; /* Point at relative address again. */
+ if ((re_opcode_t) p[-1] != pop_failure_jump)
+ {
+ p[-1] = (unsigned char) jump;
+ goto unconditional_jump;
+ }
+ /* Note fall through. */
+
+
+ /* The end of a simple repeat has a pop_failure_jump back to
+ its matching on_failure_jump, where the latter will push a
+ failure point. The pop_failure_jump takes off failure
+ points put on by this pop_failure_jump's matching
+ on_failure_jump; we got through the pattern to here from the
+ matching on_failure_jump, so didn't fail. */
+ case pop_failure_jump:
+ {
+ /* We need to pass separate storage for the lowest and
+ highest registers, even though we don't care about the
+ actual values. Otherwise, we will restore only one
+ register from the stack, since lowest will == highest in
+ `pop_failure_point'. */
+ unsigned dummy_low_reg, dummy_high_reg;
+ unsigned char *pdummy;
+ const char *sdummy;
+
+ DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
+ POP_FAILURE_POINT (sdummy, pdummy,
+ dummy_low_reg, dummy_high_reg,
+ reg_dummy, reg_dummy, reg_info_dummy);
+ }
+ /* Note fall through. */
+
+
+ /* Unconditionally jump (without popping any failure points). */
+ case jump:
+ unconditional_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
+ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
+ p += mcnt; /* Do the jump. */
+ DEBUG_PRINT2 ("(to 0x%x).\n", p);
+ break;
+
+
+ /* We need this opcode so we can detect where alternatives end
+ in `group_match_null_string_p' et al. */
+ case jump_past_alt:
+ DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+ goto unconditional_jump;
+
+
+ /* Normally, the on_failure_jump pushes a failure point, which
+ then gets popped at pop_failure_jump. We will end up at
+ pop_failure_jump, also, and with a pattern of, say, `a+', we
+ are skipping over the on_failure_jump, so we have to push
+ something meaningless for pop_failure_jump to pop. */
+ case dummy_failure_jump:
+ DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
+ /* It doesn't matter what we push for the string here. What
+ the code at `fail' tests is the value for the pattern. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ goto unconditional_jump;
+
+
+ /* At the end of an alternative, we need to push a dummy failure
+ point in case we are followed by a pop_failure_jump', because
+ we don't want the failure point for the alternative to be
+ popped. For example, matching `(a|ab)*' against `aab'
+ requires that we match the `ab' alternative. */
+ case push_dummy_failure:
+ DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+ /* See comments just above at `dummy_failure_jump' about the
+ two zeroes. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ break;
+
+ /* Have to succeed matching what follows at least n times.
+ After that, handle like `on_failure_jump'. */
+ case succeed_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+
+ assert (mcnt >= 0);
+ /* Originally, this is how many times we HAVE to succeed. */
+ if (mcnt > 0)
+ {
+ mcnt--;
+ p += 2;
+ STORE_NUMBER_AND_INCR (p, mcnt);
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
+ }
+ else if (mcnt == 0)
+ {
+ DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
+ p[2] = (unsigned char) no_op;
+ p[3] = (unsigned char) no_op;
+ goto on_failure;
+ }
+ break;
+
+ case jump_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
+
+ /* Originally, this is how many times we CAN jump. */
+ if (mcnt)
+ {
+ mcnt--;
+ STORE_NUMBER (p + 2, mcnt);
+ goto unconditional_jump;
+ }
+ /* If don't have to jump any more, skip over the rest of command. */
+ else
+ p += 4;
+ break;
+
+ case set_number_at:
+ {
+ DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p1 = p + mcnt;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
+ STORE_NUMBER (p1, mcnt);
+ break;
+ }
+
+ case wordbound:
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ break;
+ goto fail;
+
+ case notwordbound:
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ goto fail;
+ break;
+
+ case wordbeg:
+ DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+ if (LETTER_P (d) && (AT_STRINGS_BEG () || !LETTER_P (d - 1)))
+ break;
+ goto fail;
+
+ case wordend:
+ DEBUG_PRINT1 ("EXECUTING wordend.\n");
+ if (!AT_STRINGS_BEG () && LETTER_P (d - 1)
+ && (!LETTER_P (d) || AT_STRINGS_END ()))
+ break;
+ goto fail;
+
+#ifdef emacs
+#ifdef emacs19
+ case before_dot:
+ DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) >= point)
+ goto fail;
+ break;
+
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) != point)
+ goto fail;
+ break;
+
+ case after_dot:
+ DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) <= point)
+ goto fail;
+ break;
+#else /* not emacs19 */
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
+ goto fail;
+ break;
+#endif /* not emacs19 */
+
+ case syntaxspec:
+ DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchsyntax;
+
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING wordchar.\n");
+ mcnt = (int) Sword;
+ matchsyntax:
+ PREFETCH ();
+ if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+ case notsyntaxspec:
+ DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchnotsyntax;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING notwordchar.\n");
+ mcnt = (int) Sword;
+ matchnotsyntax: /* We goto here from notsyntaxspec. */
+ PREFETCH ();
+ if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+#else /* not emacs */
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+ PREFETCH ();
+ if (!LETTER_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+ PREFETCH ();
+ if (LETTER_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+#endif /* not emacs */
+
+ default:
+ abort ();
+ }
+ continue; /* Successfully executed one pattern command; keep going. */
+
+
+ /* We goto here if a matching operation fails. */
+ fail:
+ if (!FAIL_STACK_EMPTY ())
+ { /* A restart point is known. Restore to that state. */
+ DEBUG_PRINT1 ("\nFAIL:\n");
+ POP_FAILURE_POINT (d, p,
+ lowest_active_reg, highest_active_reg,
+ regstart, regend, reg_info);
+
+ /* If this failure point is a dummy, try the next one. */
+ if (!p)
+ goto fail;
+
+ /* If we failed to the end of the pattern, don't examine *p. */
+ assert (p <= pend);
+ if (p < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ /* If failed to a backwards jump that's part of a repetition
+ loop, need to pop this failure point and use the next one. */
+ switch ((re_opcode_t) *p)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case maybe_pop_jump:
+ case pop_failure_jump:
+ case jump:
+ p1 = p + 1;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+
+ if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+ || (!is_a_jump_n
+ && (re_opcode_t) *p1 == on_failure_jump))
+ goto fail;
+ break;
+ default:
+ /* do nothing */ ;
+ }
+ }
+
+ if (d >= string1 && d <= end1)
+ dend = end_match_1;
+ }
+ else
+ break; /* Matching at this starting point really fails. */
+ } /* for (;;) */
+
+ if (best_regs_set)
+ goto restore_best_regs;
+
+ FREE_VARIABLES ();
+
+ return -1; /* Failure to match. */
+} /* re_match_2 */
+
+/* Subroutine definitions for re_match_2. */
+
+
+/* We are passed P pointing to a register number after a start_memory.
+
+ Return true if the pattern up to the corresponding stop_memory can
+ match the empty string, and false otherwise.
+
+ If we find the matching stop_memory, sets P to point to one past its number.
+ Otherwise, sets P to an undefined byte less than or equal to END.
+
+ We don't handle duplicates properly (yet). */
+
+static boolean
+group_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ /* Point to after the args to the start_memory. */
+ unsigned char *p1 = *p + 2;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and return true or
+ false, as appropriate, when we get to one that can't, or to the
+ matching stop_memory. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* Could be either a loop or a series of alternatives. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ /* If the next operation is not a jump backwards in the
+ pattern. */
+
+ if (mcnt >= 0)
+ {
+ /* Go through the on_failure_jumps of the alternatives,
+ seeing if any of the alternatives cannot match nothing.
+ The last alternative starts with only a jump,
+ whereas the rest start with on_failure_jump and end
+ with a jump, e.g., here is the pattern for `a|b|c':
+
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+ /exactn/1/c
+
+ So, we have to first go through the first (n-1)
+ alternatives and then deal with the last one separately. */
+
+
+ /* Deal with the first (n-1) alternatives, which start
+ with an on_failure_jump (see above) that jumps to right
+ past a jump_past_alt. */
+
+ while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+ {
+ /* `mcnt' holds how many bytes long the alternative
+ is, including the ending `jump_past_alt' and
+ its number. */
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+ reg_info))
+ return false;
+
+ /* Move to right after this alternative, including the
+ jump_past_alt. */
+ p1 += mcnt;
+
+ /* Break if it's the beginning of an n-th alternative
+ that doesn't begin with an on_failure_jump. */
+ if ((re_opcode_t) *p1 != on_failure_jump)
+ break;
+
+ /* Still have to check that it's not an n-th
+ alternative that starts with an on_failure_jump. */
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+ {
+ /* Get to the beginning of the n-th alternative. */
+ p1 -= 3;
+ break;
+ }
+ }
+
+ /* Deal with the last alternative: go back and get number
+ of the `jump_past_alt' just before it. `mcnt' contains
+ the length of the alternative. */
+ EXTRACT_NUMBER (mcnt, p1 - 2);
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+ return false;
+
+ p1 += mcnt; /* Get past the n-th alternative. */
+ } /* if mcnt > 0 */
+ break;
+
+
+ case stop_memory:
+ assert (p1[1] == **p);
+ *p = p1 + 2;
+ return true;
+
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return false;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+ It expects P to be the first byte of a single alternative and END one
+ byte past the last. The alternative can contain groups. */
+
+static boolean
+alt_match_null_string_p (p, end, reg_info)
+ unsigned char *p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ unsigned char *p1 = p;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and break when we get
+ to one that can't. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* It's a loop. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ break;
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return true;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+ alt_match_null_string_p.
+
+ Sets P to one after the op and its arguments, if any. */
+
+static boolean
+common_op_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ boolean ret;
+ int reg_no;
+ unsigned char *p1 = *p;
+
+ switch ((re_opcode_t) *p1++)
+ {
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case wordbound:
+ case notwordbound:
+#ifdef emacs
+ case before_dot:
+ case at_dot:
+ case after_dot:
+#endif
+ break;
+
+ case start_memory:
+ reg_no = *p1;
+ assert (reg_no > 0 && reg_no <= MAX_REGNUM);
+ ret = group_match_null_string_p (&p1, end, reg_info);
+
+ /* Have to set this here in case we're checking a group which
+ contains a group and a back reference to it. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+
+ if (!ret)
+ return false;
+ break;
+
+ /* If this is an optimized succeed_n for zero times, make the jump. */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (mcnt >= 0)
+ p1 += mcnt;
+ else
+ return false;
+ break;
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p1 += 2;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ if (mcnt == 0)
+ {
+ p1 -= 4;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ }
+ else
+ return false;
+ break;
+
+ case duplicate:
+ if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+ return false;
+ break;
+
+ case set_number_at:
+ p1 += 4;
+
+ default:
+ /* All other opcodes mean we cannot match the empty string. */
+ return false;
+ }
+
+ *p = p1;
+ return true;
+} /* common_op_match_null_string_p */
+
+
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+ bytes; nonzero otherwise. */
+
+static int
+bcmp_translate (s1, s2, len, translate)
+ unsigned char *s1, *s2;
+ register int len;
+ char *translate;
+{
+ register unsigned char *p1 = s1, *p2 = s2;
+ while (len)
+ {
+ if (translate[*p1++] != translate[*p2++]) return 1;
+ len--;
+ }
+ return 0;
+}
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length SIZE) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry.
+
+ We call regex_compile to do the actual compilation. */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ int length;
+ struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ bufp->regs_allocated = REGS_UNALLOCATED;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ bufp->no_sub = 0;
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = regex_compile (pattern, length, re_syntax_options, bufp);
+
+ return re_error_msg[(int) ret];
+}
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them if this is an Emacs or POSIX compilation. */
+
+#if !defined (emacs) && !defined (_POSIX_SOURCE)
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return "No previous regular expression";
+ return 0;
+ }
+
+ if (!re_comp_buf.buffer)
+ {
+ re_comp_buf.buffer = (unsigned char *) malloc (200);
+ if (re_comp_buf.buffer == NULL)
+ return "Memory exhausted";
+ re_comp_buf.allocated = 200;
+
+ re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
+ if (re_comp_buf.fastmap == NULL)
+ return "Memory exhausted";
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+
+ /* Yes, we're discarding `const' here. */
+ return (char *) re_error_msg[(int) ret];
+}
+
+
+int
+re_exec (s)
+ const char *s;
+{
+ const int len = strlen (s);
+ return
+ 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+#endif /* not emacs and not _POSIX_SOURCE */
+
+/* POSIX.2 functions. Don't define these for Emacs. */
+
+#ifndef emacs
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' and `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *preg;
+ const char *pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ unsigned syntax
+ = cflags & REG_EXTENDED ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+ /* regex_compile will allocate the space for the compiled pattern. */
+ preg->buffer = 0;
+
+ /* Don't bother to use a fastmap when searching. This simplifies the
+ REG_NEWLINE case: if we used a fastmap, we'd have to put all the
+ characters after newlines into the fastmap. This way, we just try
+ every character. */
+ preg->fastmap = 0;
+
+ if (cflags & REG_ICASE)
+ {
+ unsigned i;
+
+ preg->translate = (char *) malloc (CHAR_SET_SIZE);
+ if (preg->translate == NULL)
+ return (int) REG_ESPACE;
+
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ preg->translate[i] = isupper (i) ? tolower (i) : i;
+ }
+ else
+ preg->translate = NULL;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+
+ preg->no_sub = !!(cflags & REG_NOSUB);
+
+ /* POSIX says a null character in the pattern terminates it, so we
+ can use strlen here in compiling the pattern. */
+ ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+ return (int) ret;
+}
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *preg;
+ const char *string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ int ret;
+ struct re_registers regs;
+ regex_t private_preg;
+ int len = strlen (string);
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+ private_preg = *preg;
+
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+ /* The user has told us exactly how many registers to return
+ information about, via `nmatch'. We have to pass that on to the
+ matching routines. */
+ private_preg.regs_allocated = REGS_FIXED;
+
+ if (want_reg_info)
+ {
+ regs.num_regs = nmatch;
+ regs.start = TALLOC (nmatch, regoff_t);
+ regs.end = TALLOC (nmatch, regoff_t);
+ if (regs.start == NULL || regs.end == NULL)
+ return (int) REG_NOMATCH;
+ }
+
+ /* Perform the searching operation. */
+ ret = re_search (&private_preg, string, len,
+ /* start: */ 0, /* range: */ len,
+ want_reg_info ? &regs : (struct re_registers *) 0);
+
+ /* Copy the register information to the POSIX structure. */
+ if (want_reg_info)
+ {
+ if (ret >= 0)
+ {
+ unsigned r;
+
+ for (r = 0; r < nmatch; r++)
+ {
+ pmatch[r].rm_so = regs.start[r];
+ pmatch[r].rm_eo = regs.end[r];
+ }
+ }
+
+ /* If we needed the temporary register info, free the space now. */
+ free (regs.start);
+ free (regs.end);
+ }
+
+ /* We want zero return to mean success, unlike `re_search'. */
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. */
+
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ const regex_t *preg;
+ char *errbuf;
+ size_t errbuf_size;
+{
+ const char *msg
+ = re_error_msg[errcode] == NULL ? "Success" : re_error_msg[errcode];
+ size_t msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (errbuf_size != 0)
+ {
+ if (msg_size > errbuf_size)
+ {
+ strncpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+ }
+ else
+ strcpy (errbuf, msg);
+ }
+
+ return msg_size;
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ if (preg->buffer != NULL)
+ free (preg->buffer);
+ preg->buffer = NULL;
+
+ preg->allocated = 0;
+ preg->used = 0;
+
+ if (preg->fastmap != NULL)
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ preg->fastmap_accurate = 0;
+
+ if (preg->translate != NULL)
+ free (preg->translate);
+ preg->translate = NULL;
+}
+
+#endif /* not emacs */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/lib/regex.h b/lib/regex.h
new file mode 100644
index 000000000..87824ef47
--- /dev/null
+++ b/lib/regex.h
@@ -0,0 +1,481 @@
+/* Definitions for data structures and routines for the regular
+ expression library, version 0.11.
+
+ Copyright (C) 1985, 89, 90, 91, 92 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+/* POSIX says that <sys/types.h> must be included before <regex.h>. */
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VAR | RE_NO_EMPTY_RANGES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+#define RE_DUP_MAX ((1 << 15) - 1)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ char *translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when regex_compile compiles a pattern; set to one
+ by re_compile_fastmap when it updates the fastmap, if any. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, regexec reports only success or failure and does not
+ return anything in pmatch. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+
+/* search.c (search_buffer) in Emacs needs this one opcode value. It is
+ defined both in `regex.c' and here. */
+#define RE_EXACTN_VALUE 1
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it.
+
+ We also have to undo `const' if we are not ANSI and if it hasn't
+ previously being taken care of. */
+
+#if __STDC__
+#define _RE_ARGS(args) args
+#else
+#define _RE_ARGS(args) ()
+#ifndef const
+#define const
+#endif
+#endif
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, int length,
+ struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+
+/* POSIX compatibility. */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+ _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int eflags));
+extern size_t regerror
+ _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+ size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/old/textutils/ChangeLog b/old/textutils/ChangeLog
new file mode 100644
index 000000000..71d300dba
--- /dev/null
+++ b/old/textutils/ChangeLog
@@ -0,0 +1,855 @@
+Sat Nov 7 00:26:14 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
+
+ * wc.c (wc): If doing only -c, use st_size for regular files.
+
+ * fold.c (fold_file): Was folding 1 column too early.
+ From Eric Backus <ericb@lsid.hp.com>.
+
+ * memset.c: New file.
+
+Fri Nov 6 20:14:51 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
+
+ * cksum.c: New file.
+
+Tue Oct 13 16:24:06 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
+
+ * tac.c (tac_stdin): Handle SIGPIPE.
+ * sort.c (main): Handle SIGTERM.
+
+ * od.c: New file.
+
+ * system.h [USG || STDC_HEADERS]: Define bcmp.
+
+Sat Oct 3 20:41:24 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
+
+ * sort.c (main): Handle SIGPIPE. From trq@dionysos.thphys.ox.ac.uk.
+
+Tue Sep 29 01:10:05 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * paste.c (main): Don't write on a string constant.
+
+Mon Aug 24 00:02:45 1992 Jim Meyering (meyering@churchy.gnu.ai.mit.edu)
+
+ * tr.c: Minor cleanup. Replaced some assert(0) with abort().
+
+Tue Jul 7 02:14:19 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * cmp.c, cmp.1: Move to diff distribution.
+
+Fri Jul 3 16:37:59 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * system.h: Change FOO_MISSING to HAVE_FOO.
+
+Wed May 13 20:05:41 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu)
+
+ * pr.c (COLUMN): Add structure member to remember filename.
+ (main, init_fps, open_file, close_file): Use it.
+
+ (close_file): Don't decrement cols_ready_to_print when closing
+ a file. From cdl@mpl.UCSD.EDU (Carl Lowenstein).
+
+Mon May 11 19:17:33 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu)
+
+ * cmp.c: --show-chars -> --print-chars.
+
+ * pr.c: Rename some variables.
+
+Sat May 9 18:39:47 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
+
+ * system.h: Define DEV_BSIZE if not defined.
+
+Wed Apr 22 02:15:09 1992 David J. MacKenzie (djm@churchy.gnu.ai.mit.edu)
+
+ * system.h, tac.c: SIGTYPE -> RETSIGTYPE.
+
+Fri Apr 17 10:42:23 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
+
+ * sort.c (main): Don't stop processing args when we hit "-";
+ treat it like a regular filename.
+ From ian@airs.com (Ian Lance Taylor).
+
+ * pr.c (print_page): Fix off by one line count when ^L is in input.
+ From Andreas Schwab (schwab@ls5.informatik.uni-dortmund.de).
+
+Mon Apr 6 20:52:29 1992 Jim Meyering (meyering@churchy.gnu.ai.mit.edu)
+
+ * tr.c (validate): Change error message so it doesn't mention
+ actual name of --truncate-set1 option. From David MacKenzie.
+
+Sun Apr 5 14:22:42 1992 Jim Meyering (meyering@hal.gnu.ai.mit.edu)
+
+ * tr.c (string2_extend, validate): Give an error message when
+ translating without --truncate-set1, with empty string2, and
+ with non-empty string1. "tr 1 ''" produced a failed assertion.
+
+Mon Mar 30 02:20:56 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
+
+ * system.h: Change how ST_BLKSIZE is calculated to allow for
+ non-POSIX systems that don't define BSIZE in sys/param.h.
+
+Sat Mar 28 11:18:01 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
+
+ * sum.c (main, bsd_sum_file): Don't print filename if BSD
+ algorithm is used and only one file was given.
+
+Wed Mar 25 11:34:41 1992 Jim Meyering (meyering@wombat.gnu.ai.mit.edu)
+
+ * tr.c (get_spec_stats): Fix assertion to allow ranges like a-a
+ with starting character equal to ending character. This is
+ contrary to the POSIX spec, but what is already implemented
+ in find_closing_delim.
+
+Mon Mar 16 00:15:11 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
+
+ * Version 1.3.
+
+ * sort.c (numcompare, checkfp): Add parens to placate gcc2.
+
+ * sort.c (mergefps): For -u, output the first, not last, of
+ the lines that compare equal. From Mike Haertel.
+
+Tue Mar 10 10:51:38 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * tr.c: Remove initial capitals and periods from error messages.
+
+Sun Mar 8 22:03:45 1992 David J. MacKenzie (djm@nutrimat.gnu.ai.mit.edu)
+
+ * sum.c (main): Add -r option for SYSV compat.
+
+Thu Feb 27 22:26:25 1992 David J. MacKenzie (djm@wookumz.gnu.ai.mit.edu)
+
+ * sort.c (compare): If -s given, leave lines in their original order.
+ (main): Recognize -s.
+ (usage): Document -s.
+ From Mike Haertel.
+
+Tue Feb 18 20:29:45 1992 Randall Smith (randy at geech.gnu.ai.mit.edu)
+
+ * sort.c (sort): Check for complete parsing of buffer into
+ lines before nixing temp files.
+
+Mon Feb 17 10:35:58 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * sum.c (sysv_sum_file): Use %lu instead of %u to print a
+ long. Not that it matters for GNU . . .
+
+ * tr.c (unquote, make_printable_str): Use \007 instead of ANSI \a.
+ (append_normal_char, append_range, append_char_class,
+ append_repeated_char, append_equiv_class, spec_init):
+ Initialize `next' field of new `struct List_element'.
+ From rommel@informatik.tu-muenchen.de (Kai-Uwe Rommel).
+
+Sat Feb 8 17:16:49 1992 David J. MacKenzie (djm at apple-gunkies.gnu.ai.mit.edu)
+
+ * join.c (get_line): Renamed from getline to avoid GNU libc conflict.
+
+Sun Feb 2 21:22:01 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * Version 1.2.
+
+ * nl.c: Support multiple files and "-" for stdin.
+ (main): Check for read and write errors.
+ (nl_file): New function.
+
+Wed Jan 29 10:09:10 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * tr.c (main): -t option was called -b in getopt spec.
+ (validate): Don't warn that set1 is longer than set2.
+
+ * tr.c: Rename --sysv-string2-truncate to --truncate-string1.
+
+Fri Jan 17 16:29:05 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * nl.c: New program from bin-src.
+
+ * nl.c (main): Use a struct linebuffer for line_buf.
+ (process_file): Use readline instead of fgets, to preserve NULs.
+ (check_section): Use memcmp instead of strncmp.
+ (proc_text): Print line_buf with fwrite instead of printf.
+
+ * nl.c (main): Usage message if too many args given. Check
+ for error in closing input file. Lengths of section delimiter
+ strings were 1 too large. Take separator_str into account in
+ length of print_no_line_fmt.
+ (build_print_fmt): Allocate space for print_fmt, in case
+ separator_str is long.
+ (proc_text): A blank line is one that contains nothing, not
+ even nonprinting characters.
+
+Fri Jan 17 01:04:22 1992 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * All programs: Document `--' instead of `+' to introduce
+ long-named options, in usage messages.
+
+ * sum.c (bsd_sum_file): Renamed from sum_file.
+ (sysv_sum_file): New function.
+ (main): Recognize an option to select between the algorithms.
+
+Sun Jan 5 17:41:18 1992 Jim Meyering (meyering at churchy.gnu.ai.mit.edu)
+
+ * pr.c (close_file, print_page): Fixed bug that had the command
+ yes |head |pr -t printing "yyyyyyyyyy".
+ * (print_page): Fixed bug that had pr -3 -a printing two too few
+ trailer lines per page.
+ * (main): Added restriction that -a and -m are incompatible.
+ Although the POSIX spec doesn't explicitly say they shouldn't
+ be used together, it says -a modifies the -column option and
+ that -column shouldn't be used with -m.
+
+Thu Jan 2 15:23:59 1992 David J. MacKenzie (djm at albert.gnu.ai.mit.edu)
+
+ * nl.c: Include regex.h after, not before, sys/types.h.
+
+Thu Jan 2 12:18:10 1992 Tom Lord (lord at geech.gnu.ai.mit.edu)
+
+ * sort.c (fillbuf) return bytes buffered instead of bytes read.
+
+Fri Dec 27 22:53:36 1991 Jim Kingdon (kingdon at geech.gnu.ai.mit.edu)
+
+ * sort.c (LINEALLOC): New #define.
+ (struct lines): New field ``limit''.
+ (initlines): Set it from new arg ``limit''.
+ (sort, mergefps, checkfp): Pass new arg to initlines().
+ (findlines): Don't realloc past lines->limit.
+
+Tue Dec 24 01:24:03 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * tac.c, sort.c, csplit.c, system.h: Change POSIX ifdefs to
+ HAVE_UNISTD_H and _POSIX_VERSION.
+
+ * xwrite.c: Change POSIX ifdef to HAVE_UNISTD_H.
+
+Sat 14 Dec 1991 11:46:42 Jim Meyering (meyering at wombat)
+
+ * tr.c: Fixed an inaccurate comment on posix_pedantic.
+
+Thu 12 Dec 1991 21:15:20 Jim Meyering (meyering at hal)
+
+ * tr.c: Changed underscores to hyphens in long option name
+ "sysv_string2_truncate".
+
+Wed Dec 11 13:33:34 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * tac.c (main): Set obscure_syntax to tell re_search to
+ allocate memory for the group registers.
+
+Fri Dec 6 18:26:27 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * tac.c, sort.c, csplit.c [POSIX]: Use sigaction instead of
+ signal, which POSIX doesn't have.
+ * sort.c: Replace inthandler and huphandler with sighandler.
+ * csplit.c (main): Only handle signals if they were not being
+ ignored.
+
+ * tr.c: POSIX_ME_HARDER -> POSIXLY_CORRECT.
+
+Wed Dec 4 00:47:47 1991 Jim Meyering (meyering at wombat)
+
+ * tr.c (unquote): Reformat code so it doesn't go beyond column 80.
+ * tr.c (squeeze_filter): Comment a little on why it's better
+ to step through the input by two.
+ * tr.c (set_initialize): Write a comment describing the function.
+ * tr.c: Eliminated the variable `portability_warnings' and replaced
+ references to it by references to `!posix_pedantic'. One of the
+ uses of portability_warnings had been wrong.
+
+Tue Dec 3 14:03:35 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * tr.c: New program.
+
+Sun Dec 1 15:07:35 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * linebuffer.[ch] (freebuffer): New function (used by cron).
+
+Thu Oct 17 22:30:22 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * system.h, configure, Makefile.in: Don't include memory.h if
+ STDC_HEADERS, removing need for MEMORY_H_MISSING.
+
+Thu 17 Oct 1991 16:53:07 Jim Meyering (meyering at wombat)
+
+ * pr.c (print_page): REALLY fixed `extra newline at EOF' problem.
+ Somehow, part of my patch didn't make it last time.
+
+Sat Oct 12 12:04:47 1991 David J. MacKenzie (djm at churchy.gnu.ai.mit.edu)
+
+ * tail.c (pipe_lines, pipe_bytes): Initialize `first->next'.
+
+ * cmp.c (cmp): Print EOF message to stderr, not stdout, for
+ POSIX 1003.2.11.2.
+
+ * sort.c (xfwrite): fwrite never returns < 0, so check if
+ number written is number we asked to write.
+ (fillbuf, main): fread never returns < 0, so check ferror instead.
+ From Rainer Orth.
+
+Tue Oct 8 18:07:08 1991 Jim Meyering (meyering at churchy)
+
+ * pr.c (print_page): Really fixed `extra newline at EOF' problem.
+ * (store_columns): Fixed bug that caused `pr -b -2' to coredump
+ on files of certain lengths.
+
+Fri Oct 4 22:30:25 1991 Jim Meyering (meyering at churchy)
+
+ * pr.c (print_page): Fixed to not add single spurious newline
+ at EOF when using -t.
+
+Wed Oct 2 01:02:05 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * pr.c (print_page): Don't pad the page if -t given.
+
+ * csplit.c (load_buffer), sort.c (mergefps): Use bcopy, not memcpy.
+
+Thu Sep 26 12:35:00 1991 David J. MacKenzie (djm at churchy.gnu.ai.mit.edu)
+
+ * Version 1.1.
+
+ * configure, system.h: Include memory.h if it works.
+
+ * split.c: Allow `b' unit as well as `k' and `m'.
+
+ * head.c, tail.c: Replace -b +blocks option with specifying
+ units (b, k, or m) after the number.
+ (parse_unit): New function.
+
+ * fold.c (main): Check that -w arg is a number.
+
+ * cut.c: +delimiter takes an arg.
+
+Mon Sep 16 14:52:38 1991 David J. MacKenzie (djm at churchy.gnu.ai.mit.edu)
+
+ * pr.c (close_file): Don't close an already closed file.
+
+Thu Sep 12 00:14:43 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * memchr.c: New file.
+ * configure: Check if it's needed.
+
+ * csplit.c, gcsplit.1: New program.
+
+ * pr.c (cleanup): Only free buffers if they were allocated.
+
+ * sort.c [!USG && !STDC_HEADERS]: Declare memchr.
+
+Wed Sep 11 20:54:16 1991 Jim Meyering (meyering at churchy)
+
+ * pr.c: The following 3 bugs appeared (at least) when printing
+ a single file with the options `-3 -f'.
+ * (print_white_space): Single spaces were being replaced
+ with tabs.
+ * (print_page): Some lines were getting too much white space
+ at the beginning because spaces_not_printed wasn't being reset
+ to 0.
+ * (read_line): The single space between a truncated column
+ on its left and the column on its right was omitted. Fixed
+ so that previous value of input_position is restored before
+ returning FALSE.
+
+Sat Sep 7 03:22:18 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * configure: Only remove /etc from PATH when it's not part of
+ a larger name.
+
+Wed Sep 4 17:09:24 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * linebuffer.c (readline): Fix incorrect recalculation of `end'.
+
+ * head.c, tail.c: Replace `mode' variables and bitmasks with
+ separate variables for each option.
+
+Mon Sep 2 04:00:37 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * wc.c: New program.
+
+Sun Sep 1 01:18:38 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * fold.c (fold_file): Read in an int, not a char, for EOF
+ comparison.
+
+ * configure: Check whether st_blksize is missing.
+
+ * tac.c (save_stdin): Put copy of pipe input in TMPDIR if
+ defined, instead of /tmp.
+
+Thu Aug 29 14:48:15 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * xwrite.c [POSIX]: unistd.h might require sys/types.h.
+
+Wed Aug 28 11:57:39 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * paste.c (main): Consider -d "" to be like -d "\0",
+ for POSIX (if I read it right).
+
+ * sort.c, join.c: New programs.
+
+ * cut.c (set_field): Allow blanks as well as commas to
+ separate numbers in ranges.
+
+Sun Aug 25 19:57:40 1991 Jim Meyering (meyering at apple-gunkies)
+
+ * pr.c: Failure to open an input file is no longer a fatal error.
+ A message is printed for each failed open. When printing
+ in parallel, each failed open results in one fewer output column.
+ Added POSIX -r option to suppress the message.
+ * pr.c: Added variables: failed_opens, ignore_failed_opens.
+ These changes were based in part on work by David MacKenzie.
+
+Sat Aug 24 15:27:39 1991 Jim Meyering (meyering at pogo)
+
+ * pr.c: Complain if user gives both -m and -[0-9]+ options.
+
+Wed Aug 21 22:04:57 1991 David J. MacKenzie (djm at apple-gunkies)
+
+ * Version 1.0.
+
+Mon Aug 19 00:16:51 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * expand.c: Rename some variables.
+ (expand): Don't access value off end of array.
+ * unexpand.c: Rename some variables.
+ (unexpand): Don't access value off end of array.
+ Instead of copying tabs verbatim and flushing pending spaces
+ when one is reached, count them as the proper number of
+ pending spaces. Instead of changing tabs to single spaces if
+ the tabstop list is exhausted, print the rest of the line
+ unchanged (for POSIX).
+
+Sat Aug 17 01:49:41 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * cut.c (cut_file), paste.c (paste_parallel, paste_serial):
+ Clear EOF and error conditions on stdin so it can be reused.
+
+ * expand.c, unexpand.c (parse_tabstops): Allow blanks as well
+ as commas to separate tabstops, for POSIX.
+ * expand.c (expand), unexpand.c (unexpand): Don't line-buffer
+ the output; send it directly to stdout.
+ * unexpand.c (main): Make -t stupidly imply -a for POSIX.
+ (unexpand): If a tab stop list was given and we move past its end,
+ copy the rest of the line verbatim.
+
+ * split.c (convint): New function to allow 'm' and 'k' after
+ byte counts.
+ (main): Use it. Change -c option to -b for POSIX.
+
+Fri Aug 9 02:47:02 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * pr.c: Protect isdigit with isascii, if required.
+
+Tue Aug 6 21:42:25 1991 David J. MacKenzie (djm at wheat-chex)
+
+ Most of the following is from Paul Eggert:
+ * cat.c (main): If stdin is read, check close at end.
+ * cmp.c (main): Check for stdin being closed.
+ Check for close errors on stdin and stdout.
+ (cmp): Return a value instead of exiting.
+ * cut.c (cut_file): New function, from code in main.
+ Check for read errors.
+ (main): Check for close errors.
+ * expand.c, unexpand.c (main): Check for close errors.
+ (next_file): Go to next file if one can't be opened.
+ Check for close errors.
+ * head.c (main), tail.c (main): If stdin was read, check for
+ close errors.
+ * head.c (head_file), tail.c (tail_file): Check for close errors.
+ * paste.c (main, paste_parallel, paste_serial), tac.c (main):
+ Check for close errors. Close stdin if it was read.
+ * split.c (main): Check for close errors.
+
+ * configure, Makefile.in's: Support +srcdir option.
+ Make config.status. Fix up clean targets.
+
+Wed Jul 31 01:32:59 1991 David J. MacKenzie (djm at hal)
+
+ * linebuffer.h (struct linebuffer): Add a field to indicate
+ the number of valid chars in the line.
+ * linebuffer.c (initbuffer, readline): Fill it in.
+ * uniq.c, comm.c: Use it.
+
+ * pr.c (main): Check stdin and stdout fclose for errors.
+ (init_parameters): If there's no room for header and footer,
+ omit them rather than dying (for POSIX).
+ (init_header): Take a file descriptor as additional arg.
+ (init_fps): Change callers. Note when stdin is read.
+ (open_file): For filename "-" use stdin.
+ (close_file): Don't close stdin. Check close for errors.
+ (print_char, char_to_clump): Use isprint instead of explicit
+ comparisons.
+
+ * memcmp.c: New file (needed for comm).
+ * bcopy.c: New file (needed for fold).
+ * system.h: Don't define bcopy as memcpy.
+ * configure: Check for bcopy and memcmp.
+
+ * uniq.c (main): Use "-" instead of NULL to mean stdin or
+ stdout.
+ (check_file): Use "-" instead of NULL to mean stdin or stdout.
+ Check readline return instead of for NUL character to
+ detect eof.
+ Check fclose for errors.
+ (find_field): Use linebuffer length, not NULs, to detect end
+ of line.
+ (different): New function, replaces compare. Uses memcmp
+ instead of strncmp.
+ (writeline): Use fwrite instead of fputs so NULs are preserved.
+
+ * comm.c (compare_files): Return an error indication.
+ Don't take a filename of NULL to mean stdin.
+ Use memcmp instead of strcmp to allow for NULs.
+ Check fclose for errors.
+ (writeline): Use fwrite instead of fputs so NULs are preserved.
+
+ * sum.c (sum_file): Take an arg indicating whether to print
+ the filename, and don't take NULL meaning stdin. Set a flag
+ when we read stdin. Check fclose return for errors.
+ (main): If stdin was read, check fclose return for errors.
+ Use filename of "-" if no args given.
+
+Thu Jul 25 15:17:10 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * fold.c: Rewritten from scratch for POSIX.
+
+Wed Jul 24 01:55:41 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * split.c (line_bytes_split): Use xmalloc instead of alloca.
+ * system.h: Don't declare alloca.
+
+ * tac.c, tail.c: Use SEEK_ instead of L_ for lseek.
+ * system.h: Define SEEK_ macros if not defined.
+
+ * pr.c: Rename variable `truncate' to avoid library function conflict.
+
+Tue Jul 23 13:21:48 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * linebuffer.c, linebuffer.h: New files.
+ * comm.c, uniq.c (initbuffer, readline): Functions
+ removed (use versions in linebuffer.c).
+
+Mon Jul 22 13:23:53 1991 David J. MacKenzie (djm at wookumz.gnu.ai.mit.edu)
+
+ * sum.c (sumfile): Always print 5 digits for second number, too.
+ Interpret "-" to mean stdin.
+
+Sat Jul 20 14:24:40 1991 David J. MacKenzie (djm at bleen)
+
+ * uniq.c: Use isblank instead of isspace, to support POSIX.2.
+ * comm.c, pr.c, uniq.c (concat, fatal, error,
+ pfatal_with_name, xmalloc, xrealloc): Functions removed.
+
+Sat Jul 13 02:04:53 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * nl.c: Add long-named options. Doc fixes.
+
+Sat Jul 6 02:19:09 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * expand.c, unexpand.c [STDC_HEADERS]: Include stdlib.h.
+
+ * xwrite.c [POSIX]: Include unistd.h.
+ [STDC_HEADERS]: Don't declare errno.
+
+Sun Jun 30 23:35:16 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * uniq.c: Add long-named options. Remove marginally useful -z
+ option (zero padded repeat counts).
+
+Thu Jun 27 16:31:45 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * tail.c (tail_file), tac.c (save_stdin, tac_file), split.c
+ (cwrite), head.c (head_file), cat.c (main): Check close return
+ value for delayed error report due to NFS.
+
+Tue Jun 11 00:12:15 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * cat.c: Replace "uchar" with "unsigned char", to avoid
+ problems with various systems' typedefs.
+
+Thu Jun 6 12:54:26 1991 David J. MacKenzie (djm at geech.gnu.ai.mit.edu)
+
+ * cat.c (cat): Interpret ENOTTY return from FIONREAD ioctl to mean
+ operation is unsupported, for HP-UX 7.0.
+
+Sun Apr 14 21:49:17 1991 Richard Stallman (rms at mole.gnu.ai.mit.edu)
+
+ * sum.c: Always print five digits for first number.
+
+Fri Mar 15 16:16:54 1991 David J. MacKenzie (djm at geech.ai.mit.edu)
+
+ * cat.c, cmp.c: Don't use fileno(); not needed.
+
+Thu Jan 10 02:16:55 1991 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * tac.c, tail.c: Change _POSIX_SOURCE to POSIX.
+
+Thu Dec 27 00:06:45 1990 David J. MacKenzie (djm at egypt)
+
+ * cut.c (cut_file_bytes, cut_file_fields): Make inbufp and
+ outbufp global.
+ (enlarge_line): Adjust inbufp and outbufp.
+
+Sun Sep 9 16:54:19 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * cat.c: Declare free returning void, not int, so it
+ doesn't bomb on Xenix.
+
+Mon Sep 3 22:23:57 1990 David J. MacKenzie (djm at coke)
+
+ * tac.c: Print error messages before calling cleanup, not after.
+
+Tue Aug 28 18:05:24 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * tac.c (cleanup): Return SIGTYPE, not int.
+
+Tue Aug 7 12:51:18 1990 David J. MacKenzie (djm at apple-gunkies)
+
+ * cut.c (main, usage): Add -b and -n options for POSIX.
+ (set_fields): Don't allow SPC or TAB as number separators.
+
+ * paste.c (paste_parallel): If open of any file fails, quit
+ (for POSIX).
+
+Mon Aug 6 22:14:13 1990 David J. MacKenzie (djm at apple-gunkies)
+
+ * pr.c: Add POSIX -F option (same as -f).
+
+ * uniq.c (check_file): Allow '-' to mean stdin or stdout.
+
+Mon Aug 6 14:43:30 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
+
+ * head.c, tail.c: Change `chars' to `bytes' globally.
+ (main, usage): Use POSIX.2 draft 10 option syntax.
+
+Sun Aug 5 11:51:12 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
+
+ * cat.c (main): Don't delay error messages, so they appear
+ where expected.
+ (main, simple_cat, cat): Make errors in input files nonfatal.
+
+Sat Aug 4 10:11:30 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
+
+ * cat.c: Remove -c option added for POSIX draft 9, since POSIX
+ draft 10 removed it.
+
+ * tac.c (tac_stdin): Use fstat instead of lseek to determine
+ whether stdin is seekable, because lseek silently fails on
+ some special files, like tty's.
+ tail.c (tail_chars, tail_lines): Use fstat instead of lseek;
+ don't turn off -f for non-regular files (assume the user knows
+ what he's doing; it might work for fifo's and sockets).
+
+ * paste.c (main): If no files given, use stdin.
+ Don't let collapse_escapes write on string constant (delim default).
+ (paste_parallel): Don't close stdin.
+
+ * cut.c (main): Use standard input for filename of "-".
+
+ * comm.c (compare_files): Allow '-' to mean stdin.
+
+Fri Aug 3 13:38:28 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
+
+ * cut.c (enlarge_line): Take an arg giving the required amount
+ of space. Change callers.
+ (main): Don't allow -t'<TAB>' without -f.
+ Make `delim' unsigned to fix sign extension problem in comparison.
+
+Tue Jul 17 12:36:11 EDT 1990 Jay Fenlason (hack@ai.mit.edu)
+
+ * pr.c Deleted excess whitespace from ends of lines.
+ Modified to work with current version of getopt, which
+ returns 1 instead of 0 for non-options.
+ Reversed the meaning of the -f option, to be compatable
+ with real pr.
+
+Sun Jul 8 00:39:31 1990 David J. MacKenzie (djm at apple-gunkies)
+
+ * cmp.c (main, usage): Rename -L option to -c and don't have
+ it imply -l.
+ (printc): Take an arg to specify number of chars to pad to,
+ for column alignment.
+ (cmp): Respect flag_print_chars in default output format.
+ Align columns for cmp -cl.
+
+Sat Jul 7 17:23:30 1990 David J. MacKenzie (djm at apple-gunkies)
+
+ * cmp.c: For +show-chars, have getopt return 'L' so
+ `flag_print_chars' gets set.
+
+Fri Jun 29 01:04:19 1990 David J. MacKenzie (djm at apple-gunkies)
+
+ * tac.c (main): Initialize fastmap and translate fields of
+ regex before compiling it.
+
+Fri Jun 22 00:38:20 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * tac.c: Change +regexp to +regex for consistency with GNU find.
+
+Wed Jun 20 01:46:09 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * cat.c (cat): If FIONREAD is available, only use it if it is
+ supported by the filesystem that the file is on.
+
+Sun Jun 3 20:26:19 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * cat.c (main): Add a variable to control whether the check
+ for input file == output file is made, because no values of
+ st_dev and st_ino should be assumed to be available for this
+ purpose. Only do the check for regular files.
+
+ * tac.c: Use bcopy instead of memcpy.
+
+Thu May 31 00:55:36 1990 David J. MacKenzie (djm at apple-gunkies)
+
+ * head.c: Use longs instead of ints for file offsets, for 16
+ bit machines.
+
+Tue May 22 00:56:51 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * cmp.c: Change some ints to longs for 16 bit machines.
+ (bcmp_cnt): Make char-pointer counting slightly simpler.
+
+Sat May 12 01:16:42 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * cat.c (main): Allow input file to be output file for devices
+ (ttys, etc.). Check whether input file is output file when
+ reading standard input. Print any error messages for standard
+ input.
+
+ * cmp.c (bcmp_cnt): Handle int comparisons correctly on 16 bit
+ machines as well as 32 bit ones.
+ * cmp.c, tail.c: Use longs instead of ints for file offsets.
+
+Fri May 11 02:11:03 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * cmp.c: Fix some exit statuses for POSIX.
+
+Tue May 8 03:41:42 1990 David J. MacKenzie (djm at abyss)
+
+ * tac.c: Use regular expressions as the record boundaries.
+ Give better error messages.
+ Reformat code and make it more readable.
+ (main): Use getopt_long to parse options.
+ (tac_stdin): Do not make a temporary file if standard input
+ is a file.
+ (tac_file): New function.
+ (tac): Take an open file desc as an arg.
+ (output): Rewrite to use its own efficient buffering.
+ (xmalloc, xrealloc, xwrite): New functions.
+
+Sun Apr 8 20:33:20 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * head.c, tail.c: Use `error' instead of `fatal_perror' and
+ `nonfatal_perror'. Remove some unnecessary info from messages.
+
+Wed Mar 21 09:30:18 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
+
+ * comm.c (main): Pass the list of files to compare_files as a
+ char ** instead of a char *.
+ (compare_files): Make arg a char **.
+
+ * uniq.c: Declare some functions as void.
+ Change global vars `countmode' and `mode' from ints to enums.
+ (main): Use getopt to parse options and support POSIX options.
+ Don't use integer_arg to parse numbers, since `-#' can't be
+ parsed that way using getopt.
+ (find_field): Use isspace for finding fields boundaries.
+
+Tue Mar 20 14:28:25 1990 David J. MacKenzie (djm at pogo.ai.mit.edu)
+
+ * comm.c (main): Call usage if given bad option or wrong
+ number of args. Exit with 0 status normally.
+ (usage): New function.
+ Declare some other functions as void.
+
+Wed Mar 14 10:48:40 1990 David J. MacKenzie (djm at rice-chex)
+
+ * cmp.c (main, cmp, usage): Replace -q +quick option with -L
+ +show-chars option to add ASCII representation of bytes to -l format.
+
+Tue Mar 13 00:50:14 1990 David J. MacKenzie (djm at rice-chex)
+
+ * cmp.c (cmp): Change EOF message for POSIX compatibility.
+ For -l format, clear bits > FF.
+
+Mon Mar 5 17:21:00 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * tail.c: Move global `errors' into main instead of having
+ nonfatal_perror set it.
+ (tail, tail_chars, tail_file, tail_lines, pipe_chars, pipe_lines):
+ Return an error status.
+ (file_lines, start_chars, start_lines): Reverse the meaning of
+ the return value.
+ (tail_lines, tail_chars): Account for that reversal.
+
+Mon Mar 5 00:34:36 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * head.c: Move global `errors' into main and have the various
+ functions return an error status instead of setting it in
+ nonfatal_perror.
+
+Sat Mar 3 11:27:27 1990 Torbj|rn Granlund (tege at echnaton)
+
+ * cmp.c (cmp): Call function bcmp_cnt for flag == 0 (i.e. no
+ options specified), to compare the two blocks and count
+ newlines simultaneously.
+ * cmp.c New function: bcmp_cnt.
+
+ * cmp.c (main): Test if output is redirected to /dev/null, and
+ assume `-s' if this is so.
+
+Tue Feb 20 17:09:19 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * cat.c: Change `argbad' from a char to a short, so it will
+ work on machines with unsigned chars.
+
+Sat Feb 10 02:16:40 1990 David J. MacKenzie (djm at albert.ai.mit.edu)
+
+ * cmp.c (cmp): Rename `r' to `first_diff', and `x' to `smaller'.
+ Remove unneccessary variable `c1'. If -l was given, increase
+ `char_number' by the number of bytes read, after producing output,
+ rather than by the offset of the first differing bytes, before
+ producing output.
+ Replace if-else-if constructions with case statements for clarity.
+ (bcmp2): Rename `n' to `nread'.
+
+Wed Dec 20 01:32:06 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * nl.c (proc_text): Use re_search instead of re_match.
+
+Tue Dec 19 01:26:34 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * nl.c: Indent. Un-nest statements. Use GNU regexp functions
+ instead of System V ones. Move function declarations together.
+ (quit): Remove useless function.
+ (program_name): New variable for error messages.
+ (main): Use perror in error message.
+ (xmalloc): New function to replace myalloc.
+ (myalloc): Function removed.
+ Global: use program_name and xmalloc.
+
+Sun Dec 17 00:36:36 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * uniq.c: Declare some functions.
+ (main): Initialize infile and outfile. Call usage if given
+ invalid args. Normally exit with 0 status instead of garbage.
+ (usage): New function to print usage message and exit.
+ (check_file): Remove unused variable.
+ (readline): Compare against EOF, not < 0.
+ (xmalloc, xrealloc): Return char *, not int.
+ Ok to return 0 if 0 bytes requested.
+ (lb1, lb2): Remove unused global vars.
+ (concat): Remove unused function.
+
+Sat Dec 16 15:15:50 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu)
+
+ * comm.c: Remove unused global variables lb1, lb2.
+ (main): Remove unneeded variable.
+ (compare_files): Remove unused arg.
+ (readline): un-nest assignment. Test against EOF instead of < 0.
+ (error): Print to stderr, not stdout.
+ (xmalloc, xrealloc): Return char * instead of int.
+ Returning 0 is ok if 0 bytes requested (ANSI C).
+
+
+Local Variables:
+mode: indented-text
+left-margin: 8
+version-control: never
+End:
diff --git a/src/cat.c b/src/cat.c
new file mode 100644
index 000000000..34c438491
--- /dev/null
+++ b/src/cat.c
@@ -0,0 +1,660 @@
+/* cat -- concatenate files and print on the standard output.
+ Copyright (C) 1988, 1990, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Differences from the Unix cat:
+ * Always unbuffered, -u is ignored.
+ * 100 times faster with -v -u.
+ * 20 times faster with -v.
+
+ By tege@sics.se, Torbjorn Granlund, advised by rms, Richard Stallman. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#ifndef _POSIX_SOURCE
+#include <sys/ioctl.h>
+#endif
+#include "system.h"
+
+#define max(h,i) ((h) > (i) ? (h) : (i))
+
+char *stpcpy ();
+char *xmalloc ();
+void cat ();
+void error ();
+void next_line_num ();
+void simple_cat ();
+
+/* Name under which this program was invoked. */
+char *program_name;
+
+/* Name of input file. May be "-". */
+char *infile;
+
+/* Descriptor on which input file is open. */
+int input_desc;
+
+/* Descriptor on which output file is open. Always is 1. */
+int output_desc;
+
+/* Buffer for line numbers. */
+char line_buf[13] =
+{' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0', '\t', '\0'};
+
+/* Position in `line_buf' where printing starts. This will not change
+ unless the number of lines are more than 999999. */
+char *line_num_print = line_buf + 5;
+
+/* Position of the first digit in `line_buf'. */
+char *line_num_start = line_buf + 10;
+
+/* Position of the last digit in `line_buf'. */
+char *line_num_end = line_buf + 10;
+
+/* Preserves the `cat' function's local `newlines' between invocations. */
+int newlines2 = 0;
+
+/* Count of non-fatal error conditions. */
+int exit_stat = 0;
+
+void
+usage (reason)
+ char *reason;
+{
+ if (reason != NULL)
+ fprintf (stderr, "%s: %s\n", program_name, reason);
+
+ fprintf (stderr, "\
+Usage: %s [-benstuvAET] [--number] [--number-nonblank] [--squeeze-blank]\n\
+ [--show-nonprinting] [--show-ends] [--show-tabs] [--show-all]\n\
+ [file...]\n",
+ program_name);
+
+ exit (2);
+}
+
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ /* Optimal size of i/o operations of output. */
+ int outsize;
+
+ /* Optimal size of i/o operations of input. */
+ int insize;
+
+ /* Pointer to the input buffer. */
+ unsigned char *inbuf;
+
+ /* Pointer to the output buffer. */
+ unsigned char *outbuf;
+
+ int c;
+
+ /* Index in argv to processed argument. */
+ int argind;
+
+ /* Device number of the output (file or whatever). */
+ int out_dev;
+
+ /* I-node number of the output. */
+ int out_ino;
+
+ /* Nonzero if the output file should not be the same as any input file. */
+ int check_redirection = 1;
+
+ /* Nonzero if we have ever read standard input. */
+ int have_read_stdin = 0;
+
+ struct stat stat_buf;
+
+ /* Variables that are set according to the specified options. */
+ int numbers = 0;
+ int numbers_at_empty_lines = 1;
+ int squeeze_empty_lines = 0;
+ int mark_line_ends = 0;
+ int quote = 0;
+ int output_tabs = 1;
+ int options = 0;
+
+ static struct option long_options[] =
+ {
+ {"number-nonblank", 0, NULL, 'b'},
+ {"number", 0, NULL, 'n'},
+ {"squeeze-blank", 0, NULL, 's'},
+ {"show-nonprinting", 0, NULL, 'v'},
+ {"show-ends", 0, NULL, 'E'},
+ {"show-tabs", 0, NULL, 'T'},
+ {"show-all", 0, NULL, 'A'},
+ {NULL, 0, NULL, 0}
+ };
+
+ program_name = argv[0];
+
+ /* Parse command line options. */
+
+ while ((c = getopt_long (argc, argv, "benstuvAET", long_options, (int *) 0))
+ != EOF)
+ {
+ options++;
+ switch (c)
+ {
+ case 'b':
+ numbers = 1;
+ numbers_at_empty_lines = 0;
+ break;
+
+ case 'e':
+ mark_line_ends = 1;
+ quote = 1;
+ break;
+
+ case 'n':
+ numbers = 1;
+ break;
+
+ case 's':
+ squeeze_empty_lines = 1;
+ break;
+
+ case 't':
+ output_tabs = 0;
+ quote = 1;
+ break;
+
+ case 'u':
+ /* We provide the -u feature unconditionally. */
+ options--;
+ break;
+
+ case 'v':
+ quote = 1;
+ break;
+
+ case 'A':
+ quote = 1;
+ mark_line_ends = 1;
+ output_tabs = 0;
+ break;
+
+ case 'E':
+ mark_line_ends = 1;
+ break;
+
+ case 'T':
+ output_tabs = 0;
+ break;
+
+ default:
+ usage ((char *) 0);
+ }
+ }
+
+ output_desc = 1;
+
+ /* Get device, i-node number, and optimal blocksize of output. */
+
+ if (fstat (output_desc, &stat_buf) < 0)
+ error (1, errno, "standard output");
+
+ outsize = ST_BLKSIZE (stat_buf);
+ /* Input file can be output file for non-regular files.
+ fstat on pipes returns S_IFSOCK on some systems, S_IFIFO
+ on others, so the checking should not be done for those types,
+ and to allow things like cat < /dev/tty > /dev/tty, checking
+ is not done for device files either. */
+
+ if (S_ISREG (stat_buf.st_mode))
+ {
+ out_dev = stat_buf.st_dev;
+ out_ino = stat_buf.st_ino;
+ }
+ else
+ check_redirection = 0;
+
+ /* Check if any of the input files are the same as the output file. */
+
+ /* Main loop. */
+
+ infile = "-";
+ argind = optind;
+
+ do
+ {
+ if (argind < argc)
+ infile = argv[argind];
+
+ if (infile[0] == '-' && infile[1] == 0)
+ {
+ have_read_stdin = 1;
+ input_desc = 0;
+ }
+ else
+ {
+ input_desc = open (infile, O_RDONLY);
+ if (input_desc < 0)
+ {
+ error (0, errno, "%s", infile);
+ exit_stat = 1;
+ continue;
+ }
+ }
+
+ if (fstat (input_desc, &stat_buf) < 0)
+ {
+ error (0, errno, "%s", infile);
+ exit_stat = 1;
+ goto contin;
+ }
+ insize = ST_BLKSIZE (stat_buf);
+
+ /* Compare the device and i-node numbers of this input file with
+ the corresponding values of the (output file associated with)
+ stdout, and skip this input file if they coincide. Input
+ files cannot be redirected to themselves. */
+
+ if (check_redirection
+ && stat_buf.st_dev == out_dev && stat_buf.st_ino == out_ino)
+ {
+ error (0, 0, "%s: input file is output file", infile);
+ exit_stat = 1;
+ goto contin;
+ }
+
+ /* Select which version of `cat' to use. If any options (more than -u)
+ were specified, use `cat', otherwise use `simple_cat'. */
+
+ if (options == 0)
+ {
+ insize = max (insize, outsize);
+ inbuf = (unsigned char *) xmalloc (insize);
+
+ simple_cat (inbuf, insize);
+ }
+ else
+ {
+ inbuf = (unsigned char *) xmalloc (insize + 1);
+
+ /* Why are (OUTSIZE - 1 + INSIZE * 4 + 13) bytes allocated for
+ the output buffer?
+
+ A test whether output needs to be written is done when the input
+ buffer empties or when a newline appears in the input. After
+ output is written, at most (OUTSIZE - 1) bytes will remain in the
+ buffer. Now INSIZE bytes of input is read. Each input character
+ may grow by a factor of 4 (by the prepending of M-^). If all
+ characters do, and no newlines appear in this block of input, we
+ will have at most (OUTSIZE - 1 + INSIZE) bytes in the buffer. If
+ the last character in the preceeding block of input was a
+ newline, a line number may be written (according to the given
+ options) as the first thing in the output buffer. (Done after the
+ new input is read, but before processing of the input begins.) A
+ line number requires seldom more than 13 positions. */
+
+ outbuf = (unsigned char *) xmalloc (outsize - 1 + insize * 4 + 13);
+
+ cat (inbuf, insize, outbuf, outsize, quote,
+ output_tabs, numbers, numbers_at_empty_lines, mark_line_ends,
+ squeeze_empty_lines);
+
+ free (outbuf);
+ }
+
+ free (inbuf);
+
+ contin:
+ if (strcmp (infile, "-") && close (input_desc) < 0)
+ {
+ error (0, errno, "%s", infile);
+ exit_stat = 1;
+ }
+ }
+ while (++argind < argc);
+
+ if (have_read_stdin && close (0) < 0)
+ error (1, errno, "-");
+ if (close (1) < 0)
+ error (1, errno, "write error");
+
+ exit (exit_stat);
+}
+
+/* Plain cat. Copies the file behind `input_desc' to the file behind
+ `output_desc'. */
+
+void
+simple_cat (buf, bufsize)
+ /* Pointer to the buffer, used by reads and writes. */
+ unsigned char *buf;
+
+ /* Number of characters preferably read or written by each read and write
+ call. */
+ int bufsize;
+{
+ /* Actual number of characters read, and therefore written. */
+ int n_read;
+
+ /* Loop until the end of the file. */
+
+ for (;;)
+ {
+ /* Read a block of input. */
+
+ n_read = read (input_desc, buf, bufsize);
+ if (n_read < 0)
+ {
+ error (0, errno, "%s", infile);
+ exit_stat = 1;
+ return;
+ }
+
+ /* End of this file? */
+
+ if (n_read == 0)
+ break;
+
+ /* Write this block out. */
+
+ if (write (output_desc, buf, n_read) != n_read)
+ error (1, errno, "write error");
+ }
+}
+
+/* Cat the file behind INPUT_DESC to the file behind OUTPUT_DESC.
+ Called if any option more than -u was specified.
+
+ A newline character is always put at the end of the buffer, to make
+ an explicit test for buffer end unnecessary. */
+
+void
+cat (inbuf, insize, outbuf, outsize, quote,
+ output_tabs, numbers, numbers_at_empty_lines,
+ mark_line_ends, squeeze_empty_lines)
+
+ /* Pointer to the beginning of the input buffer. */
+ unsigned char *inbuf;
+
+ /* Number of characters read in each read call. */
+ int insize;
+
+ /* Pointer to the beginning of the output buffer. */
+ unsigned char *outbuf;
+
+ /* Number of characters written by each write call. */
+ int outsize;
+
+ /* Variables that have values according to the specified options. */
+ int quote;
+ int output_tabs;
+ int numbers;
+ int numbers_at_empty_lines;
+ int mark_line_ends;
+ int squeeze_empty_lines;
+{
+ /* Last character read from the input buffer. */
+ unsigned char ch;
+
+ /* Pointer to the next character in the input buffer. */
+ unsigned char *bpin;
+
+ /* Pointer to the first non-valid byte in the input buffer, i.e. the
+ current end of the buffer. */
+ unsigned char *eob;
+
+ /* Pointer to the position where the next character shall be written. */
+ unsigned char *bpout;
+
+ /* Number of characters read by the last read call. */
+ int n_read;
+
+ /* Determines how many consequtive newlines there have been in the
+ input. 0 newlines makes NEWLINES -1, 1 newline makes NEWLINES 1,
+ etc. Initially 0 to indicate that we are at the beginning of a
+ new line. The "state" of the procedure is determined by
+ NEWLINES. */
+ int newlines = newlines2;
+
+#ifdef FIONREAD
+ /* If nonzero, use the FIONREAD ioctl, as an optimization.
+ (On Ultrix, it is not supported on NFS filesystems.) */
+ int use_fionread = 1;
+#endif
+
+ /* The inbuf pointers are initialized so that BPIN > EOB, and thereby input
+ is read immediately. */
+
+ eob = inbuf;
+ bpin = eob + 1;
+
+ bpout = outbuf;
+
+ for (;;)
+ {
+ do
+ {
+ /* Write if there are at least OUTSIZE bytes in OUTBUF. */
+
+ if (bpout - outbuf >= outsize)
+ {
+ unsigned char *wp = outbuf;
+ do
+ {
+ if (write (output_desc, wp, outsize) != outsize)
+ error (1, errno, "write error");
+ wp += outsize;
+ }
+ while (bpout - wp >= outsize);
+
+ /* Move the remaining bytes to the beginning of the
+ buffer. */
+
+ bcopy (wp, outbuf, bpout - wp);
+ bpout = outbuf + (bpout - wp);
+ }
+
+ /* Is INBUF empty? */
+
+ if (bpin > eob)
+ {
+#ifdef FIONREAD
+ int n_to_read = 0;
+
+ /* Is there any input to read immediately?
+ If not, we are about to wait,
+ so write all buffered output before waiting. */
+
+ if (use_fionread
+ && ioctl (input_desc, FIONREAD, &n_to_read) < 0)
+ {
+ /* Ultrix returns EOPNOTSUPP on NFS;
+ HP-UX returns ENOTTY on pipes. */
+ if (errno == EOPNOTSUPP || errno == ENOTTY)
+ use_fionread = 0;
+ else
+ {
+ error (0, errno, "cannot do ioctl on `%s'", infile);
+ exit_stat = 1;
+ newlines2 = newlines;
+ return;
+ }
+ }
+ if (n_to_read == 0)
+#endif
+ {
+ int n_write = bpout - outbuf;
+
+ if (write (output_desc, outbuf, n_write) != n_write)
+ error (1, errno, "write error");
+ bpout = outbuf;
+ }
+
+ /* Read more input into INBUF. */
+
+ n_read = read (input_desc, inbuf, insize);
+ if (n_read < 0)
+ {
+ error (0, errno, "%s", infile);
+ exit_stat = 1;
+ newlines2 = newlines;
+ return;
+ }
+ if (n_read == 0)
+ {
+ newlines2 = newlines;
+ return;
+ }
+
+ /* Update the pointers and insert a sentinel at the buffer
+ end. */
+
+ bpin = inbuf;
+ eob = bpin + n_read;
+ *eob = '\n';
+ }
+ else
+ {
+ /* It was a real (not a sentinel) newline. */
+
+ /* Was the last line empty?
+ (i.e. have two or more consecutive newlines been read?) */
+
+ if (++newlines > 0)
+ {
+ /* Are multiple adjacent empty lines to be substituted by
+ single ditto (-s), and this was the second empty line? */
+
+ if (squeeze_empty_lines && newlines >= 2)
+ {
+ ch = *bpin++;
+ continue;
+ }
+
+ /* Are line numbers to be written at empty lines (-n)? */
+
+ if (numbers && numbers_at_empty_lines)
+ {
+ next_line_num ();
+ bpout = (unsigned char *) stpcpy (bpout, line_num_print);
+ }
+ }
+
+ /* Output a currency symbol if requested (-e). */
+
+ if (mark_line_ends)
+ *bpout++ = '$';
+
+ /* Output the newline. */
+
+ *bpout++ = '\n';
+ }
+ ch = *bpin++;
+ }
+ while (ch == '\n');
+
+ /* Are we at the beginning of a line, and line numbers are requested? */
+
+ if (newlines >= 0 && numbers)
+ {
+ next_line_num ();
+ bpout = (unsigned char *) stpcpy (bpout, line_num_print);
+ }
+
+ /* Here CH cannot contain a newline character. */
+
+ /* The loops below continue until a newline character is found,
+ which means that the buffer is empty or that a proper newline
+ has been found. */
+
+ /* If quoting, i.e. at least one of -v, -e, or -t specified,
+ scan for chars that need conversion. */
+ if (quote)
+ for (;;)
+ {
+ if (ch >= 32)
+ {
+ if (ch < 127)
+ *bpout++ = ch;
+ else if (ch == 127)
+ *bpout++ = '^',
+ *bpout++ = '?';
+ else
+ {
+ *bpout++ = 'M',
+ *bpout++ = '-';
+ if (ch >= 128 + 32)
+ if (ch < 128 + 127)
+ *bpout++ = ch - 128;
+ else
+ *bpout++ = '^',
+ *bpout++ = '?';
+ else
+ *bpout++ = '^',
+ *bpout++ = ch - 128 + 64;
+ }
+ }
+ else if (ch == '\t' && output_tabs)
+ *bpout++ = '\t';
+ else if (ch == '\n')
+ {
+ newlines = -1;
+ break;
+ }
+ else
+ *bpout++ = '^',
+ *bpout++ = ch + 64;
+
+ ch = *bpin++;
+ }
+ else
+ /* Not quoting, neither of -v, -e, or -t specified. */
+ for (;;)
+ {
+ if (ch == '\t' && !output_tabs)
+ *bpout++ = '^',
+ *bpout++ = ch + 64;
+ else if (ch != '\n')
+ *bpout++ = ch;
+ else
+ {
+ newlines = -1;
+ break;
+ }
+
+ ch = *bpin++;
+ }
+ }
+}
+
+/* Compute the next line number. */
+
+void
+next_line_num ()
+{
+ char *endp = line_num_end;
+ do
+ {
+ if ((*endp)++ < '9')
+ return;
+ *endp-- = '0';
+ }
+ while (endp >= line_num_start);
+ *--line_num_start = '1';
+ if (line_num_start < line_num_print)
+ line_num_print--;
+}
diff --git a/src/cksum.c b/src/cksum.c
new file mode 100644
index 000000000..df9c3130b
--- /dev/null
+++ b/src/cksum.c
@@ -0,0 +1,274 @@
+/* cksum -- calculate and print POSIX.2 checksums and sizes of files
+ Copyright (C) 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Q. Frank Xia, qx@math.columbia.edu.
+ Cosmetic changes and reorganization by David MacKenzie, djm@gnu.ai.mit.edu.
+
+ Usage: cksum [file...]
+
+ The code segment between "#ifdef CRCTAB" and "#else" is the code
+ which calculates the "crctab". It is included for those who want
+ verify the correctness of the "crctab". To recreate the "crctab",
+ do following:
+
+ cc -DCRCTAB -o crctab cksum.c
+ crctab > crctab.h
+
+ As Bruce Evans pointed out to me, the crctab in the sample C code
+ in 4.9.10 Rationale of P1003.2/D11.2 is represented in reversed order.
+ Namely, 0x01 is represented as 0x80, 0x02 is represented as 0x40, etc.
+ The generating polynomial is crctab[0x80]=0xedb88320 instead of
+ crctab[1]=0x04C11DB7. But the code works only for a non-reverse order
+ crctab. Therefore, the sample implementation is wrong.
+
+ This software is compatible with neither the System V nor the BSD
+ `sum' program. It is supposed to conform to P1003.2/D11.2,
+ except foreign language interface (4.9.5.3 of P1003.2/D11.2) support.
+ Any inconsistency with the standard except 4.9.5.3 is a bug. */
+
+#ifdef CRCTAB
+
+#include <stdio.h>
+
+#define BIT(x) ( (unsigned long)1 << (x) )
+#define SBIT BIT(31)
+
+/* The generating polynomial is
+
+ 32 26 23 22 16 12 11 10 8 7 5 4 2 1
+ G(X)=X + X + X + X + X + X + X + X + X + X + X + X + X + X + 1
+
+ The i bit in GEN is set if X^i is a summand of G(X) except X^32. */
+
+#define GEN (BIT(26)|BIT(23)|BIT(22)|BIT(16)|BIT(12)|BIT(11)|BIT(10)\
+ |BIT(8) |BIT(7) |BIT(5) |BIT(4) |BIT(2) |BIT(1) |BIT(0));
+
+unsigned long r[8];
+
+void
+fill_r ()
+{
+ int i;
+
+ r[0] = GEN;
+ for (i = 1; i < 8; i++)
+ r[i] = (r[i - 1] & SBIT) ? (r[i - 1] << 1) ^ r[0] : r[i - 1] << 1;
+}
+
+unsigned long
+remainder (m)
+ int m;
+{
+ unsigned long rem = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ if (BIT (i) & m)
+ rem = rem ^ r[i];
+
+ return rem & 0xFFFFFFFF; /* Make it run on 64-bit machine. */
+}
+
+void
+main ()
+{
+ int i;
+
+ fill_r ();
+ printf ("unsigned long crctab[256] = {\n 0x0");
+ for (i = 0; i < 51; i++)
+ {
+ printf (",\n 0x%08X, 0x%08X, 0x%08X, 0x%08X, 0x%08X",
+ remainder (i * 5 + 1), remainder (i * 5 + 2), remainder (i * 5 + 3),
+ remainder (i * 5 + 4), remainder (i * 5 + 5));
+ }
+ printf ("\n};\n");
+ exit (0);
+}
+
+#else /* !CRCTAB */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include "system.h"
+
+/* Number of bytes to read at once. */
+#define BUFLEN (1 << 16)
+
+unsigned long crctab[256] =
+{
+ 0x0,
+ 0x04C11DB7, 0x09823B6E, 0x0D4326D9, 0x130476DC, 0x17C56B6B,
+ 0x1A864DB2, 0x1E475005, 0x2608EDB8, 0x22C9F00F, 0x2F8AD6D6,
+ 0x2B4BCB61, 0x350C9B64, 0x31CD86D3, 0x3C8EA00A, 0x384FBDBD,
+ 0x4C11DB70, 0x48D0C6C7, 0x4593E01E, 0x4152FDA9, 0x5F15ADAC,
+ 0x5BD4B01B, 0x569796C2, 0x52568B75, 0x6A1936C8, 0x6ED82B7F,
+ 0x639B0DA6, 0x675A1011, 0x791D4014, 0x7DDC5DA3, 0x709F7B7A,
+ 0x745E66CD, 0x9823B6E0, 0x9CE2AB57, 0x91A18D8E, 0x95609039,
+ 0x8B27C03C, 0x8FE6DD8B, 0x82A5FB52, 0x8664E6E5, 0xBE2B5B58,
+ 0xBAEA46EF, 0xB7A96036, 0xB3687D81, 0xAD2F2D84, 0xA9EE3033,
+ 0xA4AD16EA, 0xA06C0B5D, 0xD4326D90, 0xD0F37027, 0xDDB056FE,
+ 0xD9714B49, 0xC7361B4C, 0xC3F706FB, 0xCEB42022, 0xCA753D95,
+ 0xF23A8028, 0xF6FB9D9F, 0xFBB8BB46, 0xFF79A6F1, 0xE13EF6F4,
+ 0xE5FFEB43, 0xE8BCCD9A, 0xEC7DD02D, 0x34867077, 0x30476DC0,
+ 0x3D044B19, 0x39C556AE, 0x278206AB, 0x23431B1C, 0x2E003DC5,
+ 0x2AC12072, 0x128E9DCF, 0x164F8078, 0x1B0CA6A1, 0x1FCDBB16,
+ 0x018AEB13, 0x054BF6A4, 0x0808D07D, 0x0CC9CDCA, 0x7897AB07,
+ 0x7C56B6B0, 0x71159069, 0x75D48DDE, 0x6B93DDDB, 0x6F52C06C,
+ 0x6211E6B5, 0x66D0FB02, 0x5E9F46BF, 0x5A5E5B08, 0x571D7DD1,
+ 0x53DC6066, 0x4D9B3063, 0x495A2DD4, 0x44190B0D, 0x40D816BA,
+ 0xACA5C697, 0xA864DB20, 0xA527FDF9, 0xA1E6E04E, 0xBFA1B04B,
+ 0xBB60ADFC, 0xB6238B25, 0xB2E29692, 0x8AAD2B2F, 0x8E6C3698,
+ 0x832F1041, 0x87EE0DF6, 0x99A95DF3, 0x9D684044, 0x902B669D,
+ 0x94EA7B2A, 0xE0B41DE7, 0xE4750050, 0xE9362689, 0xEDF73B3E,
+ 0xF3B06B3B, 0xF771768C, 0xFA325055, 0xFEF34DE2, 0xC6BCF05F,
+ 0xC27DEDE8, 0xCF3ECB31, 0xCBFFD686, 0xD5B88683, 0xD1799B34,
+ 0xDC3ABDED, 0xD8FBA05A, 0x690CE0EE, 0x6DCDFD59, 0x608EDB80,
+ 0x644FC637, 0x7A089632, 0x7EC98B85, 0x738AAD5C, 0x774BB0EB,
+ 0x4F040D56, 0x4BC510E1, 0x46863638, 0x42472B8F, 0x5C007B8A,
+ 0x58C1663D, 0x558240E4, 0x51435D53, 0x251D3B9E, 0x21DC2629,
+ 0x2C9F00F0, 0x285E1D47, 0x36194D42, 0x32D850F5, 0x3F9B762C,
+ 0x3B5A6B9B, 0x0315D626, 0x07D4CB91, 0x0A97ED48, 0x0E56F0FF,
+ 0x1011A0FA, 0x14D0BD4D, 0x19939B94, 0x1D528623, 0xF12F560E,
+ 0xF5EE4BB9, 0xF8AD6D60, 0xFC6C70D7, 0xE22B20D2, 0xE6EA3D65,
+ 0xEBA91BBC, 0xEF68060B, 0xD727BBB6, 0xD3E6A601, 0xDEA580D8,
+ 0xDA649D6F, 0xC423CD6A, 0xC0E2D0DD, 0xCDA1F604, 0xC960EBB3,
+ 0xBD3E8D7E, 0xB9FF90C9, 0xB4BCB610, 0xB07DABA7, 0xAE3AFBA2,
+ 0xAAFBE615, 0xA7B8C0CC, 0xA379DD7B, 0x9B3660C6, 0x9FF77D71,
+ 0x92B45BA8, 0x9675461F, 0x8832161A, 0x8CF30BAD, 0x81B02D74,
+ 0x857130C3, 0x5D8A9099, 0x594B8D2E, 0x5408ABF7, 0x50C9B640,
+ 0x4E8EE645, 0x4A4FFBF2, 0x470CDD2B, 0x43CDC09C, 0x7B827D21,
+ 0x7F436096, 0x7200464F, 0x76C15BF8, 0x68860BFD, 0x6C47164A,
+ 0x61043093, 0x65C52D24, 0x119B4BE9, 0x155A565E, 0x18197087,
+ 0x1CD86D30, 0x029F3D35, 0x065E2082, 0x0B1D065B, 0x0FDC1BEC,
+ 0x3793A651, 0x3352BBE6, 0x3E119D3F, 0x3AD08088, 0x2497D08D,
+ 0x2056CD3A, 0x2D15EBE3, 0x29D4F654, 0xC5A92679, 0xC1683BCE,
+ 0xCC2B1D17, 0xC8EA00A0, 0xD6AD50A5, 0xD26C4D12, 0xDF2F6BCB,
+ 0xDBEE767C, 0xE3A1CBC1, 0xE760D676, 0xEA23F0AF, 0xEEE2ED18,
+ 0xF0A5BD1D, 0xF464A0AA, 0xF9278673, 0xFDE69BC4, 0x89B8FD09,
+ 0x8D79E0BE, 0x803AC667, 0x84FBDBD0, 0x9ABC8BD5, 0x9E7D9662,
+ 0x933EB0BB, 0x97FFAD0C, 0xAFB010B1, 0xAB710D06, 0xA6322BDF,
+ 0xA2F33668, 0xBCB4666D, 0xB8757BDA, 0xB5365D03, 0xB1F740B4
+};
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Nonzero if any of the files read were the standard input. */
+int have_read_stdin;
+
+/* Calculate and print the checksum and length in bytes
+ of file FILE, or of the standard input if FILE is "-".
+ If PRINT_NAME is nonzero, print FILE next to the checksum and size.
+ Return 0 if successful, -1 if an error occurs. */
+
+int
+cksum (file, print_name)
+ char *file;
+ int print_name;
+{
+ unsigned char buf[BUFLEN];
+ unsigned long crc = 0;
+ long length = 0;
+ long bytes_read;
+ register FILE *fp;
+
+ if (!strcmp (file, "-"))
+ {
+ fp = stdin;
+ have_read_stdin = 1;
+ }
+ else
+ {
+ fp = fopen (file, "r");
+ if (fp == NULL)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+ }
+
+ while ((bytes_read = fread (buf, 1, BUFLEN, fp)) > 0)
+ {
+ unsigned char *cp = buf;
+
+ length += bytes_read;
+ while (bytes_read--)
+ crc = (crc << 8) ^ crctab[((crc >> 24) ^ *(cp++)) & 0xFF];
+ }
+
+ if (ferror (fp))
+ {
+ error (0, errno, "%s", file);
+ if (strcmp (file, "-"))
+ fclose (fp);
+ return -1;
+ }
+
+ if (strcmp (file, "-") && fclose (fp) == EOF)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+
+ bytes_read = length;
+ while (bytes_read > 0)
+ {
+ crc = (crc << 8) ^ crctab[((crc >> 24) ^ bytes_read) & 0xFF];
+ bytes_read >>= 8;
+ }
+
+ crc = ~crc & 0xFFFFFFFF;
+
+ printf ("%10lu %8ld", crc, length);
+ if (print_name)
+ printf (" %s", file);
+ putchar ('\n');
+
+ return 0;
+}
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int errors = 0;
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+
+ if (argc == 1)
+ {
+ if (cksum ("-", 0) < 0)
+ errors = 1;
+ }
+ else
+ {
+ int optind;
+
+ for (optind = 1; optind < argc; ++optind)
+ if (cksum (argv[optind], 1) < 0)
+ errors = 1;
+ }
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ exit (errors);
+}
+
+#endif /* !CRCTAB */
diff --git a/src/comm.c b/src/comm.c
new file mode 100644
index 000000000..4362b6475
--- /dev/null
+++ b/src/comm.c
@@ -0,0 +1,221 @@
+/* comm -- compare two sorted files line by line.
+ Copyright (C) 1986, 1990, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Richard Stallman and David MacKenzie. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+#include "linebuffer.h"
+
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+/* If nonzero, print lines that are found only in file 1. */
+int only_file_1;
+
+/* If nonzero, print lines that are found only in file 2. */
+int only_file_2;
+
+/* If nonzero, print lines that are found in both files. */
+int both;
+
+/* The name this program was run with. */
+char *program_name;
+
+int compare_files ();
+void error ();
+void writeline ();
+void usage ();
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ int c;
+
+ program_name = argv[0];
+
+ only_file_1 = 1;
+ only_file_2 = 1;
+ both = 1;
+
+ while ((c = getopt (argc, argv, "123")) != EOF)
+ switch (c)
+ {
+ case '1':
+ only_file_1 = 0;
+ break;
+
+ case '2':
+ only_file_2 = 0;
+ break;
+
+ case '3':
+ both = 0;
+ break;
+
+ default:
+ usage ();
+ }
+
+ if (optind + 2 != argc)
+ usage ();
+
+ exit (compare_files (argv + optind));
+}
+
+/* Compare INFILES[0] and INFILES[1].
+ If either is "-", use the standard input for that file.
+ Assume that each input file is sorted;
+ merge them and output the result.
+ Return 0 if successful, 1 if any errors occur. */
+
+int
+compare_files (infiles)
+ char **infiles;
+{
+ /* For each file, we have one linebuffer in lb1. */
+ struct linebuffer lb1[2];
+
+ /* thisline[i] points to the linebuffer holding the next available line
+ in file i, or is NULL if there are no lines left in that file. */
+ struct linebuffer *thisline[2];
+
+ /* streams[i] holds the input stream for file i. */
+ FILE *streams[2];
+
+ int i, ret = 0;
+
+ /* Initialize the storage. */
+ for (i = 0; i < 2; i++)
+ {
+ initbuffer (&lb1[i]);
+ thisline[i] = &lb1[i];
+ streams[i] = strcmp (infiles[i], "-")
+ ? fopen (infiles[i], "r") : stdin;
+ if (!streams[i])
+ {
+ error (0, errno, "%s", infiles[i]);
+ return 1;
+ }
+
+ thisline[i] = readline (thisline[i], streams[i]);
+ }
+
+ while (thisline[0] || thisline[1])
+ {
+ int order;
+
+ /* Compare the next available lines of the two files. */
+
+ if (!thisline[0])
+ order = 1;
+ else if (!thisline[1])
+ order = -1;
+ else
+ {
+ /* Cannot use bcmp -- it only returns a boolean value. */
+ order = memcmp (thisline[0]->buffer, thisline[1]->buffer,
+ min (thisline[0]->length, thisline[1]->length));
+ if (order == 0)
+ order = thisline[0]->length - thisline[1]->length;
+ }
+
+ /* Output the line that is lesser. */
+ if (order == 0)
+ writeline (thisline[1], stdout, 3);
+ else if (order > 0)
+ writeline (thisline[1], stdout, 2);
+ else
+ writeline (thisline[0], stdout, 1);
+
+ /* Step the file the line came from.
+ If the files match, step both files. */
+ if (order >= 0)
+ thisline[1] = readline (thisline[1], streams[1]);
+ if (order <= 0)
+ thisline[0] = readline (thisline[0], streams[0]);
+ }
+
+ /* Free all storage and close all input streams. */
+ for (i = 0; i < 2; i++)
+ {
+ free (lb1[i].buffer);
+ if (ferror (streams[i]) || fclose (streams[i]) == EOF)
+ {
+ error (0, errno, "%s", infiles[i]);
+ ret = 1;
+ }
+ }
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ {
+ error (0, errno, "write error");
+ ret = 1;
+ }
+ return ret;
+}
+
+/* Output the line in linebuffer LINE to stream STREAM
+ provided the switches say it should be output.
+ CLASS is 1 for a line found only in file 1,
+ 2 for a line only in file 2, 3 for a line in both. */
+
+void
+writeline (line, stream, class)
+ struct linebuffer *line;
+ FILE *stream;
+ int class;
+{
+ switch (class)
+ {
+ case 1:
+ if (!only_file_1)
+ return;
+ break;
+
+ case 2:
+ if (!only_file_2)
+ return;
+ /* Skip the tab stop for case 1, if we are printing case 1. */
+ if (only_file_1)
+ putc ('\t', stream);
+ break;
+
+ case 3:
+ if (!both)
+ return;
+ /* Skip the tab stop for case 1, if we are printing case 1. */
+ if (only_file_1)
+ putc ('\t', stream);
+ /* Skip the tab stop for case 2, if we are printing case 2. */
+ if (only_file_2)
+ putc ('\t', stream);
+ break;
+ }
+
+ fwrite (line->buffer, sizeof (char), line->length, stream);
+ putc ('\n', stream);
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "Usage: %s [-123] file1 file2\n", program_name);
+ exit (1);
+}
diff --git a/src/csplit.c b/src/csplit.c
new file mode 100644
index 000000000..56bffa385
--- /dev/null
+++ b/src/csplit.c
@@ -0,0 +1,1308 @@
+/* csplit - split a file into sections determined by context lines
+ Copyright (C) 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Stuart Kemp, cpsrk@groper.jcu.edu.au.
+ Modified by David MacKenzie, djm@gnu.ai.mit.edu. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <signal.h>
+#include "regex.h"
+#include "system.h"
+
+#if !defined(USG) && !defined(STDC_HEADERS)
+char *memchr ();
+#endif
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+void error ();
+
+void cleanup ();
+void close_output_file ();
+void create_output_file ();
+void save_line_to_file ();
+void usage ();
+
+#ifndef TRUE
+#define FALSE 0
+#define TRUE 1
+#endif
+
+/* Increment size of area for control records. */
+#define ALLOC_SIZE 20
+
+/* The default prefix for output file names. */
+#define DEFAULT_PREFIX "xx"
+
+typedef int boolean;
+
+/* A compiled pattern arg. */
+struct control
+{
+ char *regexpr; /* Non-compiled regular expression. */
+ struct re_pattern_buffer re_compiled; /* Compiled regular expression. */
+ int offset; /* Offset from regexp to split at. */
+ int lines_required; /* Number of lines required. */
+ int repeat; /* Repeat count. */
+ int argnum; /* ARGV index. */
+ boolean ignore; /* If true, produce no output (for regexp). */
+};
+
+/* Initial size of data area in buffers. */
+#define START_SIZE 8191
+
+/* Increment size for data area. */
+#define INCR_SIZE 2048
+
+/* Number of lines kept in each node in line list. */
+#define CTRL_SIZE 80
+
+#ifdef DEBUG
+/* Some small values to test the algorithms. */
+#define START_SIZE 200
+#define INCR_SIZE 10
+#define CTRL_SIZE 1
+#endif
+
+/* A string with a length count. */
+struct cstring
+{
+ int len;
+ char *str;
+};
+
+/* Pointers to the beginnings of lines in the buffer area.
+ These structures are linked together if needed. */
+struct line
+{
+ unsigned used; /* Number of offsets used in this struct. */
+ unsigned insert_index; /* Next offset to use when inserting line. */
+ unsigned retrieve_index; /* Next index to use when retrieving line. */
+ struct cstring starts[CTRL_SIZE]; /* Lines in the data area. */
+ struct line *next; /* Next in linked list. */
+};
+
+/* The structure to hold the input lines.
+ Contains a pointer to the data area and a list containing
+ pointers to the individual lines. */
+struct buffer_record
+{
+ unsigned bytes_alloc; /* Size of the buffer area. */
+ unsigned bytes_used; /* Bytes used in the buffer area. */
+ unsigned start_line; /* First line number in this buffer. */
+ unsigned first_available; /* First line that can be retrieved. */
+ unsigned num_lines; /* Number of complete lines in this buffer. */
+ char *buffer; /* Data area. */
+ struct line *line_start; /* Head of list of pointers to lines. */
+ struct line *curr_line; /* The line start record currently in use. */
+ struct buffer_record *next;
+};
+
+/* Input file descriptor. */
+int input_desc = 0;
+
+/* List of available buffers. */
+struct buffer_record *free_list = NULL;
+
+/* Start of buffer list. */
+struct buffer_record *head = NULL;
+
+/* Partially read line. */
+char *hold_area = NULL;
+
+/* Number of chars in `hold_area'. */
+unsigned hold_count = 0;
+
+/* Number of the last line in the buffers. */
+unsigned last_line_number = 0;
+
+/* Number of the line currently being examined. */
+unsigned current_line = 0;
+
+/* Number of the last line in the input file. */
+unsigned last_line_in_file = 0;
+
+/* If TRUE, we have read EOF. */
+boolean have_read_eof = FALSE;
+
+/* Name of output files. */
+char *filename_space = NULL;
+
+/* Prefix part of output file names. */
+char *prefix = NULL;
+
+/* Number of digits to use in output file names. */
+int digits = 2;
+
+/* Number of files created so far. */
+unsigned files_created = 0;
+
+/* Number of bytes written to current file. */
+unsigned bytes_written;
+
+/* Output file pointer. */
+FILE *output_stream = NULL;
+
+/* Perhaps it would be cleaner to pass arg values instead of indexes. */
+char **global_argv;
+
+/* If TRUE, do not print the count of bytes in each output file. */
+boolean suppress_count;
+
+/* If TRUE, remove output files on error. */
+boolean remove_files;
+
+/* The compiled pattern arguments, which determine how to split
+ the input file. */
+struct control *controls;
+
+/* Number of elements in `controls'. */
+unsigned control_used;
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Allocate N bytes of memory dynamically, with error checking. */
+
+char *
+xmalloc (n)
+ unsigned n;
+{
+ char *p;
+
+ p = malloc (n);
+ if (p == NULL)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ }
+ return p;
+}
+
+/* Change the size of an allocated block of memory P to N bytes,
+ with error checking.
+ If P is NULL, run xmalloc.
+ If N is 0, run free and return NULL. */
+
+char *
+xrealloc (p, n)
+ char *p;
+ unsigned n;
+{
+ if (p == NULL)
+ return xmalloc (n);
+ if (n == 0)
+ {
+ free (p);
+ return 0;
+ }
+ p = realloc (p, n);
+ if (p == NULL)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ }
+ return p;
+}
+
+/* Keep track of NUM chars of a partial line in buffer START.
+ These chars will be retrieved later when another large buffer is read.
+ It is not necessary to create a new buffer for these chars; instead,
+ we keep a pointer to the existing buffer. This buffer *is* on the
+ free list, and when the next buffer is obtained from this list
+ (even if it is this one), these chars will be placed at the
+ start of the new buffer. */
+
+void
+save_to_hold_area (start, num)
+ char *start;
+ unsigned num;
+{
+ hold_area = start;
+ hold_count = num;
+}
+
+/* Read up to MAX chars from the input stream into DEST.
+ Return the number of chars read. */
+
+int
+read_input (dest, max)
+ char *dest;
+ unsigned max;
+{
+ int bytes_read;
+
+ if (max == 0)
+ return 0;
+
+ bytes_read = read (input_desc, dest, max);
+
+ if (bytes_read == 0)
+ have_read_eof = TRUE;
+
+ if (bytes_read < 0)
+ {
+ error (0, errno, "read error");
+ cleanup ();
+ }
+
+ return bytes_read;
+}
+
+/* Initialize existing line record P. */
+
+void
+clear_line_control (p)
+ struct line *p;
+{
+ p->used = 0;
+ p->insert_index = 0;
+ p->retrieve_index = 0;
+}
+
+/* Initialize all line records in B. */
+
+void
+clear_all_line_control (b)
+ struct buffer_record *b;
+{
+ struct line *l;
+
+ for (l = b->line_start; l; l = l->next)
+ clear_line_control (l);
+}
+
+/* Return a new, initialized line record. */
+
+struct line *
+new_line_control ()
+{
+ struct line *p;
+
+ p = (struct line *) xmalloc (sizeof (struct line));
+
+ p->next = NULL;
+ clear_line_control (p);
+
+ return p;
+}
+
+/* Record LINE_START, which is the address of the start of a line
+ of length LINE_LEN in the large buffer, in the lines buffer of B. */
+
+void
+keep_new_line (b, line_start, line_len)
+ struct buffer_record *b;
+ char *line_start;
+ int line_len;
+{
+ struct line *l;
+
+ /* If there is no existing area to keep line info, get some. */
+ if (b->line_start == NULL)
+ b->line_start = b->curr_line = new_line_control ();
+
+ /* If existing area for lines is full, get more. */
+ if (b->curr_line->used == CTRL_SIZE)
+ {
+ b->curr_line->next = new_line_control ();
+ b->curr_line = b->curr_line->next;
+ }
+
+ l = b->curr_line;
+
+ /* Record the start of the line, and update counters. */
+ l->starts[l->insert_index].str = line_start;
+ l->starts[l->insert_index].len = line_len;
+ l->used++;
+ l->insert_index++;
+}
+
+/* Scan the buffer in B for newline characters
+ and record the line start locations and lengths in B.
+ Return the number of lines found in this buffer.
+
+ There may be an incomplete line at the end of the buffer;
+ a pointer is kept to this area, which will be used when
+ the next buffer is filled. */
+
+unsigned
+record_line_starts (b)
+ struct buffer_record *b;
+{
+ char *line_start; /* Start of current line. */
+ char *line_end; /* End of each line found. */
+ unsigned bytes_left; /* Length of incomplete last line. */
+ unsigned lines; /* Number of lines found. */
+ unsigned line_length; /* Length of each line found. */
+
+ if (b->bytes_used == 0)
+ return 0;
+
+ lines = 0;
+ line_start = b->buffer;
+ bytes_left = b->bytes_used;
+
+ for (;;)
+ {
+ line_end = memchr (line_start, '\n', bytes_left);
+ if (line_end == NULL)
+ break;
+ line_length = line_end - line_start + 1;
+ keep_new_line (b, line_start, line_length);
+ bytes_left -= line_length;
+ line_start = line_end + 1;
+ lines++;
+ }
+
+ /* Check for an incomplete last line. */
+ if (bytes_left)
+ {
+ if (have_read_eof)
+ {
+ keep_new_line (b, line_start, bytes_left);
+ lines++;
+ last_line_in_file = last_line_number + lines;
+ }
+ else
+ save_to_hold_area (line_start, bytes_left);
+ }
+
+ b->num_lines = lines;
+ b->first_available = b->start_line = last_line_number + 1;
+ last_line_number += lines;
+
+ return lines;
+}
+
+/* Return a new buffer with room to store SIZE bytes, plus
+ an extra byte for safety. */
+
+struct buffer_record *
+create_new_buffer (size)
+ unsigned size;
+{
+ struct buffer_record *new_buffer;
+
+ new_buffer = (struct buffer_record *)
+ xmalloc (sizeof (struct buffer_record));
+
+ new_buffer->buffer = (char *) xmalloc (size + 1);
+
+ new_buffer->bytes_alloc = size;
+ new_buffer->line_start = new_buffer->curr_line = NULL;
+
+ return new_buffer;
+}
+
+/* Return a new buffer of at least MINSIZE bytes. If a buffer of at
+ least that size is currently free, use it, otherwise create a new one. */
+
+struct buffer_record *
+get_new_buffer (min_size)
+ unsigned min_size;
+{
+ struct buffer_record *p, *q;
+ struct buffer_record *new_buffer; /* Buffer to return. */
+ unsigned alloc_size; /* Actual size that will be requested. */
+
+ alloc_size = START_SIZE;
+ while (min_size > alloc_size)
+ alloc_size += INCR_SIZE;
+
+ if (free_list == NULL)
+ new_buffer = create_new_buffer (alloc_size);
+ else
+ {
+ /* Use first-fit to find a buffer. */
+ p = new_buffer = NULL;
+ q = free_list;
+
+ do
+ {
+ if (q->bytes_alloc >= min_size)
+ {
+ if (p == NULL)
+ free_list = q->next;
+ else
+ p->next = q->next;
+ break;
+ }
+ p = q;
+ q = q->next;
+ }
+ while (q);
+
+ new_buffer = (q ? q : create_new_buffer (alloc_size));
+
+ new_buffer->curr_line = new_buffer->line_start;
+ clear_all_line_control (new_buffer);
+ }
+
+ new_buffer->num_lines = 0;
+ new_buffer->bytes_used = 0;
+ new_buffer->start_line = new_buffer->first_available = last_line_number + 1;
+ new_buffer->next = NULL;
+
+ return new_buffer;
+}
+
+/* Add buffer BUF to the list of free buffers. */
+
+void
+free_buffer (buf)
+ struct buffer_record *buf;
+{
+ buf->next = free_list;
+ free_list = buf;
+}
+
+/* Append buffer BUF to the linked list of buffers that contain
+ some data yet to be processed. */
+
+void
+save_buffer (buf)
+ struct buffer_record *buf;
+{
+ struct buffer_record *p;
+
+ buf->next = NULL;
+ buf->curr_line = buf->line_start;
+
+ if (head == NULL)
+ head = buf;
+ else
+ {
+ for (p = head; p->next; p = p->next)
+ /* Do nothing. */ ;
+ p->next = buf;
+ }
+}
+
+/* Fill a buffer of input.
+
+ Set the initial size of the buffer to a default.
+ Fill the buffer (from the hold area and input stream)
+ and find the individual lines.
+ If no lines are found (the buffer is too small to hold the next line),
+ release the current buffer (whose contents would have been put in the
+ hold area) and repeat the process with another large buffer until at least
+ one entire line has been read.
+
+ Return TRUE if a new buffer was obtained, otherwise false
+ (in which case end-of-file must have been encountered). */
+
+boolean
+load_buffer ()
+{
+ struct buffer_record *b;
+ unsigned bytes_wanted = START_SIZE; /* Minimum buffer size. */
+ unsigned bytes_avail; /* Size of new buffer created. */
+ unsigned lines_found; /* Number of lines in this new buffer. */
+ char *p; /* Place to load into buffer. */
+
+ if (have_read_eof)
+ return FALSE;
+
+ /* We must make the buffer at least as large as the amount of data
+ in the partial line left over from the last call. */
+ if (bytes_wanted < hold_count)
+ bytes_wanted = hold_count;
+
+ do
+ {
+ b = get_new_buffer (bytes_wanted);
+ bytes_avail = b->bytes_alloc; /* Size of buffer returned. */
+ p = b->buffer;
+
+ /* First check the `holding' area for a partial line. */
+ if (hold_count)
+ {
+ if (p != hold_area)
+ bcopy (hold_area, p, hold_count);
+ p += hold_count;
+ b->bytes_used += hold_count;
+ bytes_avail -= hold_count;
+ hold_count = 0;
+ }
+
+ b->bytes_used += (unsigned) read_input (p, bytes_avail);
+
+ lines_found = record_line_starts (b);
+ bytes_wanted = b->bytes_alloc + INCR_SIZE;
+ if (!lines_found)
+ free_buffer (b);
+ }
+ while (!lines_found && !have_read_eof);
+
+ if (lines_found)
+ save_buffer (b);
+
+ return lines_found != 0;
+}
+
+/* Return the line number of the first line that has not yet been retrieved. */
+
+unsigned
+get_first_line_in_buffer ()
+{
+ if (head == NULL && !load_buffer ())
+ error (1, errno, "input disappeared");
+
+ return head->first_available;
+}
+
+/* Return a pointer to the logical first line in the buffer and make the
+ next line the logical first line.
+ Return NULL if there is no more input. */
+
+struct cstring *
+remove_line ()
+{
+ struct cstring *line; /* Return value. */
+ unsigned line_got; /* Number of the line retrieved. */
+ struct line *l; /* For convenience. */
+
+ if (head == NULL && !load_buffer ())
+ return NULL;
+
+ if (current_line < head->first_available)
+ current_line = head->first_available;
+
+ line_got = head->first_available++;
+
+ l = head->curr_line;
+
+ line = &l->starts[l->retrieve_index];
+
+ /* Advance index to next line. */
+ if (++l->retrieve_index == l->used)
+ {
+ /* Go on to the next line record. */
+ head->curr_line = l->next;
+ if (head->curr_line == NULL || head->curr_line->used == 0)
+ {
+ /* Go on to the next data block. */
+ struct buffer_record *b = head;
+ head = head->next;
+ free_buffer (b);
+ }
+ }
+
+ return line;
+}
+
+/* Search the buffers for line LINENUM, reading more input if necessary.
+ Return a pointer to the line, or NULL if it is not found in the file. */
+
+struct cstring *
+find_line (linenum)
+ unsigned linenum;
+{
+ struct buffer_record *b;
+
+ if (head == NULL && !load_buffer ())
+ return NULL;
+
+ if (linenum < head->start_line)
+ return NULL;
+
+ for (b = head;;)
+ {
+ if (linenum < b->start_line + b->num_lines)
+ {
+ /* The line is in this buffer. */
+ struct line *l;
+ unsigned offset; /* How far into the buffer the line is. */
+
+ l = b->line_start;
+ offset = linenum - b->start_line;
+ /* Find the control record. */
+ while (offset >= CTRL_SIZE)
+ {
+ l = l->next;
+ offset -= CTRL_SIZE;
+ }
+ return &l->starts[offset];
+ }
+ if (b->next == NULL && !load_buffer ())
+ return NULL;
+ b = b->next; /* Try the next data block. */
+ }
+}
+
+/* Return TRUE if at least one more line is available for input. */
+
+boolean
+no_more_lines ()
+{
+ return (find_line (current_line + 1) == NULL) ? TRUE : FALSE;
+}
+
+/* Set the name of the input file to NAME and open it. */
+
+void
+set_input_file (name)
+ char *name;
+{
+ if (!strcmp (name, "-"))
+ input_desc = 0;
+ else
+ {
+ input_desc = open (name, O_RDONLY);
+ if (input_desc < 0)
+ error (1, errno, "%s", name);
+ }
+}
+
+/* Write all lines from the beginning of the buffer up to, but
+ not including, line LAST_LINE, to the current output file.
+ If IGNORE is TRUE, do not output lines selected here.
+ ARGNUM is the index in ARGV of the current pattern. */
+
+void
+write_to_file (last_line, ignore, argnum)
+ unsigned last_line;
+ boolean ignore;
+ int argnum;
+{
+ struct cstring *line;
+ unsigned first_line; /* First available input line. */
+ unsigned lines; /* Number of lines to output. */
+ unsigned i;
+
+ first_line = get_first_line_in_buffer ();
+
+ if (first_line > last_line)
+ {
+ error (0, 0, "%s: line number out of range", global_argv[argnum]);
+ cleanup ();
+ }
+
+ lines = last_line - first_line;
+
+ for (i = 0; i < lines; i++)
+ {
+ line = remove_line ();
+ if (line == NULL)
+ {
+ error (0, 0, "%s: line number out of range", global_argv[argnum]);
+ cleanup ();
+ }
+ if (!ignore)
+ save_line_to_file (line);
+ }
+}
+
+/* Output any lines left after all regexps have been processed. */
+
+void
+dump_rest_of_file ()
+{
+ struct cstring *line;
+
+ while ((line = remove_line ()) != NULL)
+ save_line_to_file (line);
+}
+
+/* Handle an attempt to read beyond EOF under the control of record P,
+ on iteration REPETITION if nonzero. */
+
+void
+handle_line_error (p, repetition)
+ struct control *p;
+ int repetition;
+{
+ fprintf (stderr, "%s: `%d': line number out of range",
+ program_name, p->lines_required);
+ if (repetition)
+ fprintf (stderr, " on repetition %d\n", repetition);
+ else
+ fprintf (stderr, "\n");
+
+ cleanup ();
+}
+
+/* Determine the line number that marks the end of this file,
+ then get those lines and save them to the output file.
+ P is the control record.
+ REPETITION is the repetition number. */
+
+void
+process_line_count (p, repetition)
+ struct control *p;
+ int repetition;
+{
+ unsigned linenum;
+ unsigned last_line_to_save = p->lines_required * (repetition + 1);
+ struct cstring *line;
+
+ create_output_file ();
+
+ linenum = get_first_line_in_buffer ();
+
+ /* Check for requesting a line that has already been written out.
+ If this ever happens, it's due to a bug in csplit. */
+ if (linenum >= last_line_to_save)
+ handle_line_error (p, repetition);
+
+ while (linenum++ < last_line_to_save)
+ {
+ line = remove_line ();
+ if (line == NULL)
+ handle_line_error (p, repetition);
+ save_line_to_file (line);
+ }
+
+ close_output_file ();
+
+ /* Ensure that the line number specified is not 1 greater than
+ the number of lines in the file. */
+ if (no_more_lines ())
+ handle_line_error (p, repetition);
+}
+
+void
+regexp_error (p, repetition, ignore)
+ struct control *p;
+ int repetition;
+ boolean ignore;
+{
+ fprintf (stderr, "%s: `%s': match not found",
+ program_name, global_argv[p->argnum]);
+
+ if (repetition)
+ fprintf (stderr, " on repetition %d\n", repetition);
+ else
+ fprintf (stderr, "\n");
+
+ if (!ignore)
+ {
+ dump_rest_of_file ();
+ close_output_file ();
+ }
+ cleanup ();
+}
+
+/* Read the input until a line matches the regexp in P, outputting
+ it unless P->IGNORE is TRUE.
+ REPETITION is this repeat-count; 0 means the first time. */
+
+void
+process_regexp (p, repetition)
+ struct control *p;
+ int repetition;
+{
+ struct cstring *line; /* From input file. */
+ register unsigned line_len; /* To make "$" in regexps work. */
+ unsigned break_line; /* First line number of next file. */
+ boolean ignore = p->ignore; /* If TRUE, skip this section. */
+ int ret;
+
+ if (!ignore)
+ create_output_file ();
+
+ /* If there is no offset for the regular expression, or
+ it is positive, then it is not necessary to buffer the lines. */
+
+ if (p->offset >= 0)
+ {
+ for (;;)
+ {
+ line = find_line (++current_line);
+ if (line == NULL)
+ regexp_error (p, repetition, ignore);
+ line_len = line->len;
+ if (line->str[line_len - 1] == '\n')
+ line_len--;
+ ret = re_search (&p->re_compiled, line->str, line_len,
+ 0, line_len, (struct re_registers *) 0);
+ if (ret == -2)
+ {
+ error (0, 0, "error in regular expression search");
+ cleanup ();
+ }
+ if (ret == -1)
+ {
+ line = remove_line ();
+ if (!ignore)
+ save_line_to_file (line);
+ }
+ else
+ break;
+ }
+ }
+ else
+ {
+ /* Buffer the lines. */
+ for (;;)
+ {
+ line = find_line (++current_line);
+ if (line == NULL)
+ regexp_error (p, repetition, ignore);
+ line_len = line->len;
+ if (line->str[line_len - 1] == '\n')
+ line_len--;
+ ret = re_search (&p->re_compiled, line->str, line_len,
+ 0, line_len, (struct re_registers *) 0);
+ if (ret == -2)
+ {
+ error (0, 0, "error in regular expression search");
+ cleanup ();
+ }
+ if (ret >= 0)
+ break;
+ }
+ }
+
+ /* Account for any offset from this regexp. */
+ break_line = current_line + p->offset;
+
+ write_to_file (break_line, ignore, p->argnum);
+
+ if (!ignore)
+ close_output_file ();
+
+ current_line = break_line;
+}
+
+/* Split the input file according to the control records we have built. */
+
+void
+split_file ()
+{
+ register int i, j;
+
+ for (i = 0; i < control_used; i++)
+ {
+ if (controls[i].regexpr)
+ {
+ for (j = 0; j <= controls[i].repeat; j++)
+ process_regexp (&controls[i], j);
+ }
+ else
+ {
+ for (j = 0; j <= controls[i].repeat; j++)
+ process_line_count (&controls[i], j);
+ }
+ }
+
+ create_output_file ();
+ dump_rest_of_file ();
+ close_output_file ();
+}
+
+/* Return the name of output file number NUM. */
+
+char *
+make_filename (num)
+ int num;
+{
+ sprintf (filename_space, "%s%0*d", prefix, digits, num);
+ return filename_space;
+}
+
+/* Create the next output file. */
+
+void
+create_output_file ()
+{
+ char *name;
+
+ name = make_filename (files_created);
+ output_stream = fopen (name, "w");
+ if (output_stream == NULL)
+ {
+ error (0, errno, "%s", name);
+ cleanup ();
+ }
+ files_created++;
+ bytes_written = 0;
+}
+
+/* Delete all the files we have created. */
+
+void
+delete_all_files ()
+{
+ int i;
+ char *name;
+
+ for (i = 0; i < files_created; i++)
+ {
+ name = make_filename (i);
+ if (unlink (name))
+ error (0, errno, "%s", name);
+ }
+}
+
+/* Close the current output file and print the count
+ of characters in this file. */
+
+void
+close_output_file ()
+{
+ if (output_stream)
+ {
+ if (fclose (output_stream) == EOF)
+ {
+ error (0, errno, "write error");
+ cleanup ();
+ }
+ if (!suppress_count)
+ fprintf (stdout, "%d\n", bytes_written);
+ output_stream = NULL;
+ }
+}
+
+/* Optionally remove files created so far; then exit.
+ Called when an error detected. */
+
+void
+cleanup ()
+{
+ if (output_stream)
+ close_output_file ();
+
+ if (remove_files)
+ delete_all_files ();
+
+ exit (1);
+}
+
+/* Save line LINE to the output file and
+ increment the character count for the current file. */
+
+void
+save_line_to_file (line)
+ struct cstring *line;
+{
+ fwrite (line->str, sizeof (char), line->len, output_stream);
+ bytes_written += line->len;
+}
+
+/* Return a new, initialized control record. */
+
+struct control *
+new_control_record ()
+{
+ static unsigned control_allocated = 0; /* Total space allocated. */
+ register struct control *p;
+
+ if (control_allocated == 0)
+ {
+ control_allocated = ALLOC_SIZE;
+ controls = (struct control *)
+ xmalloc (sizeof (struct control) * control_allocated);
+ }
+ else if (control_used == control_allocated)
+ {
+ control_allocated += ALLOC_SIZE;
+ controls = (struct control *)
+ xrealloc (controls, sizeof (struct control) * control_allocated);
+ }
+ p = &controls[control_used++];
+ p->regexpr = NULL;
+ p->repeat = 0;
+ p->lines_required = 0;
+ p->offset = 0;
+ return p;
+}
+
+/* Convert string NUM to an integer and put the value in *RESULT.
+ Return a TRUE if the string consists entirely of digits,
+ FALSE if not. */
+
+boolean
+string_to_number (result, num)
+ int *result;
+ char *num;
+{
+ register char ch;
+ register int val = 0;
+
+ if (*num == '\0')
+ return FALSE;
+
+ while (ch = *num++)
+ {
+ if (!isdigit (ch))
+ return FALSE;
+ val = val * 10 + ch - '0';
+ }
+
+ *result = val;
+ return TRUE;
+}
+
+/* Check if there is a numeric offset after a regular expression.
+ STR is the entire command line argument.
+ ARGNUM is the index in ARGV of STR.
+ P is the control record for this regular expression.
+ NUM is the numeric part of STR. */
+
+void
+check_for_offset (argnum, p, str, num)
+ int argnum;
+ struct control *p;
+ char *str;
+ char *num;
+{
+ if (*num != '-' && *num != '+')
+ error (1, 0, "%s: `+' or `-' expected after delimeter", str);
+
+ if (!string_to_number (&p->offset, num + 1))
+ error (1, 0, "%s: integer expected after `%c'", str, *num);
+
+ if (*num == '-')
+ p->offset = -p->offset;
+}
+
+/* Given that the first character of command line arg STR is '{',
+ make sure that the rest of the string is a valid repeat count
+ and store its value in P.
+ ARGNUM is the ARGV index of STR. */
+
+void
+parse_repeat_count (argnum, p, str)
+ int argnum;
+ struct control *p;
+ char *str;
+{
+ char *end;
+
+ end = str + strlen (str) - 1;
+ if (*end != '}')
+ error (1, 0, "%s: `}' is required in repeat count", str);
+ *end = '\0';
+
+ if (!string_to_number (&p->repeat, str + 1))
+ error (1, 0, "%s}: integer required between `{' and `}'",
+ global_argv[argnum]);
+
+ *end = '}';
+}
+
+/* Extract the regular expression from STR and check for a numeric offset.
+ STR should start with the regexp delimiter character.
+ Return a new control record for the regular expression.
+ ARGNUM is the ARGV index of STR.
+ Unless IGNORE is TRUE, mark these lines for output. */
+
+struct control *
+extract_regexp (argnum, ignore, str)
+ int argnum;
+ boolean ignore;
+ char *str;
+{
+ int len; /* Number of chars in this regexp. */
+ char delim = *str;
+ char *closing_delim;
+ struct control *p;
+ char *err;
+
+ closing_delim = rindex (str + 1, delim);
+ if (closing_delim == NULL)
+ error (1, 0, "%s: closing delimeter `%c' missing", str, delim);
+
+ len = closing_delim - str - 1;
+ p = new_control_record ();
+ p->argnum = argnum;
+ p->ignore = ignore;
+
+ p->regexpr = (char *) xmalloc ((unsigned) (len + 1));
+ strncpy (p->regexpr, str + 1, len);
+ p->re_compiled.allocated = len * 2;
+ p->re_compiled.buffer = (unsigned char *) xmalloc (p->re_compiled.allocated);
+ p->re_compiled.fastmap = xmalloc (256);
+ p->re_compiled.translate = 0;
+ err = re_compile_pattern (p->regexpr, len, &p->re_compiled);
+ if (err)
+ {
+ error (0, 0, "%s: invalid regular expression: %s", str, err);
+ cleanup ();
+ }
+
+ if (closing_delim[1])
+ check_for_offset (argnum, p, str, closing_delim + 1);
+
+ return p;
+}
+
+/* Extract the break patterns from args START through ARGC - 1 of ARGV.
+ After each pattern, check if the next argument is a repeat count. */
+
+void
+parse_patterns (argc, start, argv)
+ int argc;
+ int start;
+ char **argv;
+{
+ int i; /* Index into ARGV. */
+ struct control *p; /* New control record created. */
+
+ for (i = start; i < argc; i++)
+ {
+ if (*argv[i] == '/' || *argv[i] == '%')
+ {
+ p = extract_regexp (i, *argv[i] == '%', argv[i]);
+ }
+ else
+ {
+ p = new_control_record ();
+ p->argnum = i;
+ if (!string_to_number (&p->lines_required, argv[i]))
+ error (1, 0, "%s: invalid pattern", argv[i]);
+ }
+
+ if (i + 1 < argc && *argv[i + 1] == '{')
+ {
+ /* We have a repeat count. */
+ i++;
+ parse_repeat_count (i, p, argv[i]);
+ }
+ }
+}
+
+void
+interrupt_handler ()
+{
+ error (0, 0, "interrupted");
+ cleanup ();
+}
+
+struct option longopts[] =
+{
+ {"digits", 1, NULL, 'n'},
+ {"quiet", 0, NULL, 's'},
+ {"silent", 0, NULL, 's'},
+ {"keep-files", 0, NULL, 'k'},
+ {"prefix", 1, NULL, 'f'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int optc;
+#ifdef _POSIX_VERSION
+ struct sigaction oldact, newact;
+#endif /* _POSIX_VERSION */
+
+ program_name = argv[0];
+ global_argv = argv;
+ controls = NULL;
+ control_used = 0;
+ suppress_count = FALSE;
+ remove_files = TRUE;
+ prefix = DEFAULT_PREFIX;
+
+#ifdef _POSIX_VERSION
+ newact.sa_handler = interrupt_handler;
+ sigemptyset (&newact.sa_mask);
+ newact.sa_flags = 0;
+
+ sigaction (SIGHUP, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGHUP, &newact, NULL);
+
+ sigaction (SIGINT, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGINT, &newact, NULL);
+
+ sigaction (SIGQUIT, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGQUIT, &newact, NULL);
+
+ sigaction (SIGTERM, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGTERM, &newact, NULL);
+#else /* !_POSIX_VERSION */
+ if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
+ signal (SIGHUP, interrupt_handler);
+ if (signal (SIGINT, SIG_IGN) != SIG_IGN)
+ signal (SIGINT, interrupt_handler);
+ if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
+ signal (SIGQUIT, interrupt_handler);
+ if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
+ signal (SIGTERM, interrupt_handler);
+#endif
+
+ while ((optc = getopt_long (argc, argv, "f:kn:s", longopts, (int *) 0))
+ != EOF)
+ switch (optc)
+ {
+ case 'f':
+ prefix = optarg;
+ break;
+
+ case 'k':
+ remove_files = FALSE;
+ break;
+
+ case 'n':
+ if (!string_to_number (&digits, optarg))
+ error (1, 0, "%s: invalid number", optarg);
+ break;
+
+ case 's':
+ suppress_count = TRUE;
+ break;
+
+ default:
+ usage ();
+ }
+
+ if (optind >= argc - 1)
+ usage ();
+
+ filename_space = (char *) xmalloc (strlen (prefix) + digits + 2);
+
+ set_input_file (argv[optind++]);
+
+ parse_patterns (argc, optind, argv);
+
+ split_file ();
+
+ if (close (input_desc) < 0)
+ {
+ error (0, errno, "read error");
+ cleanup ();
+ }
+
+ exit (0);
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-sk] [-f prefix] [-n digits] [--prefix=prefix]\n\
+ [--digits=digits] [--quiet] [--silent] [--keep-files] file pattern...\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/cut.c b/src/cut.c
new file mode 100644
index 000000000..93808b063
--- /dev/null
+++ b/src/cut.c
@@ -0,0 +1,586 @@
+/* cut - remove parts of lines of files
+ Copyright (C) 1984 by David M. Ihnat
+
+ This program is a total rewrite of the Bell Laboratories Unix(Tm)
+ command of the same name, as of System V. It contains no proprietary
+ code, and therefore may be used without violation of any proprietary
+ agreements whatsoever. However, you will notice that the program is
+ copyrighted by me. This is to assure the program does *not* fall
+ into the public domain. Thus, I may specify just what I am now:
+ This program may be freely copied and distributed, provided this notice
+ remains; it may not be sold for profit without express written consent of
+ the author.
+ Please note that I recreated the behavior of the Unix(Tm) 'cut' command
+ as faithfully as possible; however, I haven't run a full set of regression
+ tests. Thus, the user of this program accepts full responsibility for any
+ effects or loss; in particular, the author is not responsible for any losses,
+ explicit or incidental, that may be incurred through use of this program.
+
+ I ask that any bugs (and, if possible, fixes) be reported to me when
+ possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us
+
+ POSIX changes, bug fixes, long-named options, and cleanup
+ by David MacKenzie <djm@ai.mit.edu>.
+
+ Options:
+ --bytes=byte-list
+ -b byte-list Print only the bytes in positions listed
+ in BYTE-LIST.
+ Tabs and backspaces are treated like any
+ other character; they take up 1 byte.
+
+ --characters=character-list
+ -c character-list Print only characters in positions listed
+ in CHARACTER-LIST.
+ The same as -b for now, but
+ internationalization will change that.
+ Tabs and backspaces are treated like any
+ other character; they take up 1 character.
+
+ --fields=field-list
+ -f field-list Print only the fields listed in FIELD-LIST.
+ Fields are separated by a TAB by default.
+
+ --delimiter=delim
+ -d delim For -f, fields are separated by the first
+ character in DELIM instead of TAB.
+
+ -n Do not split multibyte chars (no-op for now).
+
+ --only-delimited
+ -s For -f, do not print lines that do not contain
+ the field separator character.
+
+ The BYTE-LIST, CHARACTER-LIST, and FIELD-LIST are one or more numbers
+ or ranges separated by commas. The first byte, character, and field
+ are numbered 1.
+
+ A FILE of `-' means standard input. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
+#else
+#define ISDIGIT(c) (isdigit ((c)))
+#endif
+
+char *xmalloc ();
+char *xrealloc ();
+int set_fields ();
+int cut_file ();
+void cut_stream ();
+void cut_bytes ();
+void cut_fields ();
+void enlarge_line ();
+void error ();
+void invalid_list ();
+void usage ();
+
+/* The number of elements allocated for the input line
+ and the byte or field number.
+ Enlarged as necessary. */
+int line_size;
+
+/* Processed output buffer. */
+char *outbuf;
+
+/* Where to save next char to output. */
+char *outbufptr;
+
+/* Raw line buffer for field mode. */
+char *inbuf;
+
+/* Where to save next input char. */
+char *inbufptr;
+
+/* What can be done about a byte or field. */
+enum field_action
+{
+ field_omit,
+ field_output
+};
+
+/* In byte mode, which bytes to output.
+ In field mode, which `delim'-separated fields to output.
+ Both bytes and fields are numbered starting with 1,
+ so the first element of `fields' is unused. */
+enum field_action *fields;
+
+enum operating_mode
+{
+ undefined_mode,
+
+ /* Output characters that are in the given bytes. */
+ byte_mode,
+
+ /* Output the given delimeter-separated fields. */
+ field_mode
+};
+
+enum operating_mode operating_mode;
+
+/* If nonzero,
+ for field mode, do not output lines containing no delimeter characters. */
+int delimited_lines_only;
+
+/* The delimeter character for field mode. */
+unsigned char delim;
+
+/* Nonzero if we have ever read standard input. */
+int have_read_stdin;
+
+/* The name this program was run with. */
+char *program_name;
+
+struct option longopts[] =
+{
+ {"bytes", 1, 0, 'b'},
+ {"characters", 1, 0, 'c'},
+ {"fields", 1, 0, 'f'},
+ {"delimiter", 1, 0, 'd'},
+ {"only-delimited", 0, 0, 's'},
+ {0, 0, 0, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int optc, exit_status = 0;
+
+ program_name = argv[0];
+
+ line_size = 512;
+ operating_mode = undefined_mode;
+ delimited_lines_only = 0;
+ delim = '\0';
+ have_read_stdin = 0;
+
+ fields = (enum field_action *)
+ xmalloc (line_size * sizeof (enum field_action));
+ outbuf = (char *) xmalloc (line_size);
+ inbuf = (char *) xmalloc (line_size);
+
+ for (optc = 0; optc < line_size; optc++)
+ fields[optc] = field_omit;
+
+ while ((optc = getopt_long (argc, argv, "b:c:d:f:ns", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (optc)
+ {
+ case 'b':
+ case 'c':
+ /* Build the byte list. */
+ if (operating_mode != undefined_mode)
+ usage ();
+ operating_mode = byte_mode;
+ if (set_fields (optarg) == 0)
+ error (2, 0, "no fields given");
+ break;
+
+ case 'f':
+ /* Build the field list. */
+ if (operating_mode != undefined_mode)
+ usage ();
+ operating_mode = field_mode;
+ if (set_fields (optarg) == 0)
+ error (2, 0, "no fields given");
+ break;
+
+ case 'd':
+ /* New delimiter. */
+ if (optarg[0] == '\0')
+ error (2, 0, "no delimiter given");
+ if (optarg[1] != '\0')
+ error (2, 0, "delimiter must be a single character");
+ delim = optarg[0];
+ break;
+
+ case 'n':
+ break;
+
+ case 's':
+ delimited_lines_only++;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (operating_mode == undefined_mode)
+ usage ();
+
+ if ((delimited_lines_only || delim != '\0') && operating_mode != field_mode)
+ usage ();
+
+ if (delim == '\0')
+ delim = '\t';
+
+ if (optind == argc)
+ exit_status |= cut_file ("-");
+ else
+ for (; optind < argc; optind++)
+ exit_status |= cut_file (argv[optind]);
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ {
+ error (0, errno, "-");
+ exit_status = 1;
+ }
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, 0, "write error");
+
+ exit (exit_status);
+}
+
+/* Select for printing the positions in `fields' that are listed in
+ byte or field specification FIELDSTR. FIELDSTR should be
+ composed of one or more numbers or ranges of numbers, separated by
+ blanks or commas. Incomplete ranges may be given: `-m' means
+ `1-m'; `n-' means `n' through end of line or last field.
+
+ Return the number of fields selected. */
+
+int
+set_fields (fieldstr)
+ char *fieldstr;
+{
+ int initial = 1; /* Value of first number in a range. */
+ int dash_found = 0; /* Nonzero if a '-' is found in this field. */
+ int value = 0; /* If nonzero, a number being accumulated. */
+ int fields_selected = 0; /* Number of fields selected so far. */
+ /* If nonzero, index of first field in a range that goes to end of line. */
+ int eol_range_start = 0;
+
+ for (;;)
+ {
+ if (*fieldstr == '-')
+ {
+ /* Starting a range. */
+ if (dash_found)
+ invalid_list ();
+ dash_found++;
+ fieldstr++;
+
+ if (value)
+ {
+ if (value >= line_size)
+ enlarge_line (value);
+ initial = value;
+ value = 0;
+ }
+ else
+ initial = 1;
+ }
+ else if (*fieldstr == ',' || isblank (*fieldstr) || *fieldstr == '\0')
+ {
+ /* Ending the string, or this field/byte sublist. */
+ if (dash_found)
+ {
+ dash_found = 0;
+
+ /* A range. Possibilites: -n, m-n, n-.
+ In any case, `initial' contains the start of the range. */
+ if (value == 0)
+ {
+ /* `n-'. From `initial' to end of line. */
+ eol_range_start = initial;
+ fields_selected++;
+ }
+ else
+ {
+ /* `m-n' or `-n' (1-n). */
+ if (value < initial)
+ invalid_list ();
+
+ if (value >= line_size)
+ enlarge_line (value);
+
+ /* Is there already a range going to end of line? */
+ if (eol_range_start != 0)
+ {
+ /* Yes. Is the new sequence already contained
+ in the old one? If so, no processing is
+ necessary. */
+ if (initial < eol_range_start)
+ {
+ /* No, the new sequence starts before the
+ old. Does the old range going to end of line
+ extend into the new range? */
+ if (eol_range_start < value)
+ /* Yes. Simply move the end of line marker. */
+ eol_range_start = initial;
+ else
+ {
+ /* No. A simple range, before and disjoint from
+ the range going to end of line. Fill it. */
+ for (; initial <= value; initial++)
+ fields[initial] = field_output;
+ }
+
+ /* In any case, some fields were selected. */
+ fields_selected++;
+ }
+ }
+ else
+ {
+ /* There is no range going to end of line. */
+ for (; initial <= value; initial++)
+ fields[initial] = field_output;
+ fields_selected++;
+ }
+ value = 0;
+ }
+ }
+ else if (value != 0)
+ {
+ /* A simple field number, not a range. */
+ if (value >= line_size)
+ enlarge_line (value);
+
+ fields[value] = field_output;
+ value = 0;
+ fields_selected++;
+ }
+
+ if (*fieldstr == '\0')
+ {
+ /* If there was a range going to end of line, fill the
+ array from the end of line point. */
+ if (eol_range_start)
+ for (initial = eol_range_start; initial < line_size; initial++)
+ fields[initial] = field_output;
+
+ return fields_selected;
+ }
+
+ fieldstr++;
+ }
+ else if (ISDIGIT (*fieldstr))
+ {
+ value = 10 * value + *fieldstr - '0';
+ fieldstr++;
+ }
+ else
+ invalid_list ();
+ }
+}
+
+/* Process file FILE to standard output.
+ Return 0 if successful, 1 if not. */
+
+int
+cut_file (file)
+ char *file;
+{
+ FILE *stream;
+
+ if (!strcmp (file, "-"))
+ {
+ have_read_stdin = 1;
+ stream = stdin;
+ }
+ else
+ {
+ stream = fopen (file, "r");
+ if (stream == NULL)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ }
+
+ cut_stream (stream);
+
+ if (ferror (stream))
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ if (!strcmp (file, "-"))
+ clearerr (stream); /* Also clear EOF. */
+ else if (fclose (stream) == EOF)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ return 0;
+}
+
+void
+cut_stream (stream)
+ FILE *stream;
+{
+ if (operating_mode == byte_mode)
+ cut_bytes (stream);
+ else
+ cut_fields (stream);
+}
+
+/* Print the file open for reading on stream STREAM
+ with the bytes marked `field_omit' in `fields' removed from each line. */
+
+void
+cut_bytes (stream)
+ FILE *stream;
+{
+ register int c; /* Each character from the file. */
+ int doneflag = 0; /* Nonzero if EOF reached. */
+ int char_count; /* Number of chars in the line so far. */
+
+ while (doneflag == 0)
+ {
+ /* Start processing a line. */
+ outbufptr = outbuf;
+ char_count = 0;
+
+ do
+ {
+ c = getc (stream);
+ if (c == EOF)
+ {
+ doneflag++;
+ break;
+ }
+
+ /* If this character is to be sent, stow it in the outbuffer. */
+
+ if (++char_count == line_size - 1)
+ enlarge_line (char_count);
+
+ if (fields[char_count] == field_output || c == '\n')
+ *outbufptr++ = c;
+ }
+ while (c != '\n');
+
+ if (char_count)
+ fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
+ }
+}
+
+/* Print the file open for reading on stream STREAM
+ with the fields marked `field_omit' in `fields' removed from each line.
+ All characters are initially stowed in the raw input buffer, until
+ at least one field has been found. */
+
+void
+cut_fields (stream)
+ FILE *stream;
+{
+ register int c; /* Each character from the file. */
+ int doneflag = 0; /* Nonzero if EOF reached. */
+ int char_count; /* Number of chars in line before any delim. */
+ int fieldfound; /* Nonzero if any fields to print found. */
+ int curr_field; /* Current index in `fields'. */
+
+ while (doneflag == 0)
+ {
+ char_count = 0;
+ fieldfound = 0;
+ curr_field = 1;
+ outbufptr = outbuf;
+ inbufptr = inbuf;
+
+ do
+ {
+ c = getc (stream);
+ if (c == EOF)
+ {
+ doneflag++;
+ break;
+ }
+
+ if (fields[curr_field] == field_output && c != '\n')
+ {
+ /* Working on a field. It, and its terminating
+ delimiter, go only into the processed buffer. */
+ fieldfound = 1;
+ if (outbufptr - outbuf == line_size - 2)
+ enlarge_line (outbufptr - outbuf);
+ *outbufptr++ = c;
+ }
+ else if (fieldfound == 0)
+ {
+ if (++char_count == line_size - 1)
+ enlarge_line (char_count);
+ *inbufptr++ = c;
+ }
+
+ if (c == delim && ++curr_field == line_size - 1)
+ enlarge_line (curr_field);
+ }
+ while (c != '\n');
+
+ if (fieldfound)
+ {
+ /* Something was found. Print it. */
+ if (outbufptr[-1] == delim)
+ --outbufptr; /* Suppress trailing delimiter. */
+
+ fwrite (outbuf, sizeof (char), outbufptr - outbuf, stdout);
+ if (c == '\n')
+ putc (c, stdout);
+ }
+ else if (!delimited_lines_only && char_count)
+ /* A line with some characters, no delimiters, and no
+ suppression. Print it. */
+ fwrite (inbuf, sizeof (char), inbufptr - inbuf, stdout);
+ }
+}
+
+/* Extend the buffers to accomodate at least NEW_SIZE characters. */
+
+void
+enlarge_line (new_size)
+ int new_size;
+{
+ char *newp;
+ int i;
+
+ new_size += 256; /* Leave some room to grow. */
+
+ fields = (enum field_action *)
+ xrealloc (fields, new_size * sizeof (enum field_action));
+
+ newp = (char *) xrealloc (outbuf, new_size);
+ outbufptr += newp - outbuf;
+ outbuf = newp;
+
+ newp = (char *) xrealloc (inbuf, new_size);
+ inbufptr += newp - inbuf;
+ inbuf = newp;
+
+ for (i = line_size; i < new_size; i++)
+ fields[i] = field_omit;
+ line_size = new_size;
+}
+
+void
+invalid_list ()
+{
+ error (2, 0, "invalid byte or field list");
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s {-b byte-list,--bytes=byte-list} [-n] [file...]\n\
+ %s {-c character-list,--characters=character-list} [file...]\n\
+ %s {-f field-list,--fields=field-list} [-d delim] [-s]\n\
+ [--delimiter=delim] [--only-delimited] [file...]\n",
+ program_name, program_name, program_name);
+ exit (2);
+}
diff --git a/src/expand.c b/src/expand.c
new file mode 100644
index 000000000..8e471379e
--- /dev/null
+++ b/src/expand.c
@@ -0,0 +1,377 @@
+/* expand - convert tabs to spaces
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* By default, convert all tabs to spaces.
+ Preserves backspace characters in the output; they decrement the
+ column count for tab calculations.
+ The default action is equivalent to -8.
+
+ Options:
+ --tabs=tab1[,tab2[,...]]
+ -t tab1[,tab2[,...]]
+ -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
+ spaces apart instead of the default 8. Otherwise,
+ set the tabs at columns tab1, tab2, etc. (numbered from
+ 0); replace any tabs beyond the tabstops given with
+ single spaces.
+ --initial
+ -i Only convert initial tabs on each line to spaces.
+
+ David MacKenzie <djm@ai.mit.edu> */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
+#else
+#define ISDIGIT(c) (isdigit ((c)))
+#endif
+
+/* The number of bytes added at a time to the amount of memory
+ allocated for the output line. */
+#define OUTPUT_BLOCK 256
+
+/* The number of bytes added at a time to the amount of memory
+ allocated for the list of tabstops. */
+#define TABLIST_BLOCK 256
+
+char *xmalloc ();
+char *xrealloc ();
+void error ();
+
+FILE *next_file ();
+void add_tabstop ();
+void expand ();
+void parse_tabstops ();
+void usage ();
+void validate_tabstops ();
+
+/* If nonzero, convert blanks even after nonblank characters have been
+ read on the line. */
+int convert_entire_line;
+
+/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
+int tab_size;
+
+/* Array of the explicit column numbers of the tab stops;
+ after `tab_list' is exhausted, each additional tab is replaced
+ by a space. The first column is column 0. */
+int *tab_list;
+
+/* The index of the first invalid element of `tab_list',
+ where the next element can be added. */
+int first_free_tab;
+
+/* Null-terminated array of input filenames. */
+char **file_list;
+
+/* Default for `file_list' if no files are given on the command line. */
+char *stdin_argv[] =
+{
+ "-", NULL
+};
+
+/* Nonzero if we have ever read standard input. */
+int have_read_stdin;
+
+/* Status to return to the system. */
+int exit_status;
+
+/* The name this program was run with. */
+char *program_name;
+
+struct option longopts[] =
+{
+ {"tabs", 1, NULL, 't'},
+ {"initial", 0, NULL, 'i'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int tabval = -1; /* Value of tabstop being read, or -1. */
+ int c; /* Option character. */
+
+ have_read_stdin = 0;
+ exit_status = 0;
+ convert_entire_line = 1;
+ tab_list = NULL;
+ first_free_tab = 0;
+ program_name = argv[0];
+
+ while ((c = getopt_long (argc, argv, "it:,0123456789", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (c)
+ {
+ case '?':
+ usage ();
+ case 'i':
+ convert_entire_line = 0;
+ break;
+ case 't':
+ parse_tabstops (optarg);
+ break;
+ case ',':
+ add_tabstop (tabval);
+ tabval = -1;
+ break;
+ default:
+ if (tabval == -1)
+ tabval = 0;
+ tabval = tabval * 10 + c - '0';
+ break;
+ }
+ }
+
+ add_tabstop (tabval);
+
+ validate_tabstops (tab_list, first_free_tab);
+
+ if (first_free_tab == 0)
+ tab_size = 8;
+ else if (first_free_tab == 1)
+ tab_size = tab_list[0];
+ else
+ tab_size = 0;
+
+ if (optind == argc)
+ file_list = stdin_argv;
+ else
+ file_list = &argv[optind];
+
+ expand ();
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, 0, "write error");
+
+ exit (exit_status);
+}
+
+/* Add the comma or blank separated list of tabstops STOPS
+ to the list of tabstops. */
+
+void
+parse_tabstops (stops)
+ char *stops;
+{
+ int tabval = -1;
+
+ for (; *stops; stops++)
+ {
+ if (*stops == ',' || isblank (*stops))
+ {
+ add_tabstop (tabval);
+ tabval = -1;
+ }
+ else if (ISDIGIT (*stops))
+ {
+ if (tabval == -1)
+ tabval = 0;
+ tabval = tabval * 10 + *stops - '0';
+ }
+ else
+ error (1, 0, "tab size contains an invalid character");
+ }
+
+ add_tabstop (tabval);
+}
+
+/* Add tab stop TABVAL to the end of `tab_list', except
+ if TABVAL is -1, do nothing. */
+
+void
+add_tabstop (tabval)
+ int tabval;
+{
+ if (tabval == -1)
+ return;
+ if (first_free_tab % TABLIST_BLOCK == 0)
+ tab_list = (int *) xrealloc (tab_list, first_free_tab + TABLIST_BLOCK);
+ tab_list[first_free_tab++] = tabval;
+}
+
+/* Check that the list of tabstops TABS, with ENTRIES entries,
+ contains only nonzero, ascending values. */
+
+void
+validate_tabstops (tabs, entries)
+ int *tabs;
+ int entries;
+{
+ int prev_tab = 0;
+ int i;
+
+ for (i = 0; i < entries; i++)
+ {
+ if (tabs[i] == 0)
+ error (1, 0, "tab size cannot be 0");
+ if (tabs[i] <= prev_tab)
+ error (1, 0, "tab sizes must be ascending");
+ prev_tab = tabs[i];
+ }
+}
+
+/* Change tabs to spaces, writing to stdout.
+ Read each file in `file_list', in order. */
+
+void
+expand ()
+{
+ FILE *fp; /* Input stream. */
+ int c; /* Each input character. */
+ int tab_index = 0; /* Index in `tab_list' of next tabstop. */
+ int column = 0; /* Column on screen of the next char. */
+ int next_tab_column; /* Column the next tab stop is on. */
+ int convert = 1; /* If nonzero, perform translations. */
+
+ fp = next_file ((FILE *) NULL);
+ for (;;)
+ {
+ c = getc (fp);
+ if (c == EOF)
+ {
+ fp = next_file (fp);
+ if (fp == NULL)
+ break; /* No more files. */
+ else
+ continue;
+ }
+
+ if (c == '\n')
+ {
+ putchar (c);
+ tab_index = 0;
+ column = 0;
+ convert = 1;
+ }
+ else if (c == '\t' && convert)
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (tab_index < first_free_tab - 1
+ && column >= tab_list[tab_index])
+ tab_index++;
+ next_tab_column = tab_list[tab_index];
+ if (tab_index < first_free_tab - 1)
+ tab_index++;
+ if (column >= next_tab_column)
+ next_tab_column = column + 1; /* Ran out of tab stops. */
+ }
+ else
+ {
+ next_tab_column = column + tab_size - column % tab_size;
+ }
+ while (column < next_tab_column)
+ {
+ putchar (' ');
+ ++column;
+ }
+ }
+ else
+ {
+ if (convert)
+ {
+ if (c == '\b')
+ {
+ if (column > 0)
+ --column;
+ }
+ else
+ {
+ ++column;
+ if (convert_entire_line == 0)
+ convert = 0;
+ }
+ }
+ putchar (c);
+ }
+ }
+}
+
+/* Close the old stream pointer FP if it is non-NULL,
+ and return a new one opened to read the next input file.
+ Open a filename of `-' as the standard input.
+ Return NULL if there are no more input files. */
+
+FILE *
+next_file (fp)
+ FILE *fp;
+{
+ static char *prev_file;
+ char *file;
+
+ if (fp)
+ {
+ if (ferror (fp))
+ {
+ error (0, errno, "%s", prev_file);
+ exit_status = 1;
+ }
+ if (fp == stdin)
+ clearerr (fp); /* Also clear EOF. */
+ else if (fclose (fp) == EOF)
+ {
+ error (0, errno, "%s", prev_file);
+ exit_status = 1;
+ }
+ }
+
+ while ((file = *file_list++) != NULL)
+ {
+ if (file[0] == '-' && file[1] == '\0')
+ {
+ have_read_stdin = 1;
+ prev_file = file;
+ return stdin;
+ }
+ fp = fopen (file, "r");
+ if (fp)
+ {
+ prev_file = file;
+ return fp;
+ }
+ error (0, errno, "%s", file);
+ exit_status = 1;
+ }
+ return NULL;
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-tab1[,tab2[,...]]] [-t tab1[,tab2[,...]]] [-i]\n\
+ [--tabs=tab1[,tab2[,...]]] [--initial] [file...]\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/fold.c b/src/fold.c
new file mode 100644
index 000000000..d5d4ae3fe
--- /dev/null
+++ b/src/fold.c
@@ -0,0 +1,250 @@
+/* fold -- wrap each input line to fit in specified width.
+ Copyright (C) 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by David MacKenzie. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+char *xrealloc ();
+int adjust_column ();
+int fold_file ();
+void error ();
+
+/* If nonzero, try to break on whitespace. */
+int break_spaces;
+
+/* If nonzero, count bytes, not column positions. */
+int count_bytes;
+
+/* If nonzero, at least one of the files we read was standard input. */
+int have_read_stdin;
+
+/* The name this program was run with. */
+char *program_name;
+
+struct option longopts[] =
+{
+ {"bytes", 0, NULL, 'b'},
+ {"spaces", 0, NULL, 's'},
+ {"width", 1, NULL, 'w'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int width = 80;
+ int i;
+ int optc;
+ int errs = 0;
+
+ program_name = argv[0];
+ break_spaces = count_bytes = have_read_stdin = 0;
+
+ while ((optc = getopt_long (argc, argv, "bsw:", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (optc)
+ {
+ case 'b': /* Count bytes rather than columns. */
+ count_bytes = 1;
+ break;
+
+ case 's': /* Break at word boundaries. */
+ break_spaces = 1;
+ break;
+
+ case 'w': /* Line width. */
+ width = atoi (optarg);
+ if (width < 1)
+ error (1, 0, "%s: invalid line width", optarg);
+ break;
+
+ default:
+ fprintf (stderr, "\
+Usage: %s [-bs] [-w width] [--bytes] [--spaces] [--width=width] [file...]\n",
+ argv[0]);
+ exit (1);
+ }
+ }
+
+ if (argc == optind)
+ errs |= fold_file ("-", width);
+ else
+ for (i = optind; i < argc; i++)
+ errs |= fold_file (argv[i], width);
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (fclose (stdout) == EOF)
+ error (1, errno, "write error");
+
+ exit (errs);
+}
+
+/* Fold file FILENAME, or standard input if FILENAME is "-",
+ to stdout, with maximum line length WIDTH.
+ Return 0 if successful, 1 if an error occurs. */
+
+int
+fold_file (filename, width)
+ char *filename;
+ int width;
+{
+ FILE *istream;
+ register int c;
+ int column = 0; /* Screen column where next char will go. */
+ int offset_out = 0; /* Index in `line_out' for next char. */
+ static char *line_out = NULL;
+ static size_t allocated_out = 0;
+
+ if (!strcmp (filename, "-"))
+ {
+ istream = stdin;
+ have_read_stdin = 1;
+ }
+ else
+ istream = fopen (filename, "r");
+
+ if (istream == NULL)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+
+ while ((c = getc (istream)) != EOF)
+ {
+ if (offset_out + 1 >= allocated_out)
+ {
+ allocated_out += 1024;
+ line_out = xrealloc (line_out, allocated_out);
+ }
+
+ if (c == '\n')
+ {
+ line_out[offset_out++] = c;
+ fwrite (line_out, sizeof (char), offset_out, stdout);
+ column = offset_out = 0;
+ continue;
+ }
+
+ rescan:
+ column = adjust_column (column, c);
+
+ if (column > width)
+ {
+ /* This character would make the line too long.
+ Print the line plus a newline, and make this character
+ start the next line. */
+ if (break_spaces)
+ {
+ /* Look for the last blank. */
+ int logical_end;
+
+ for (logical_end = offset_out - 1; logical_end >= 0;
+ logical_end--)
+ if (isblank (line_out[logical_end]))
+ break;
+ if (logical_end >= 0)
+ {
+ int i;
+
+ /* Found a blank. Don't output the part after it. */
+ logical_end++;
+ fwrite (line_out, sizeof (char), logical_end, stdout);
+ putchar ('\n');
+ /* Move the remainder to the beginning of the next line.
+ The areas being copied here might overlap. */
+ bcopy (line_out + logical_end, line_out,
+ offset_out - logical_end);
+ offset_out -= logical_end;
+ for (column = i = 0; i < offset_out; i++)
+ column = adjust_column (column, line_out[i]);
+ goto rescan;
+ }
+ }
+ line_out[offset_out++] = '\n';
+ fwrite (line_out, sizeof (char), offset_out, stdout);
+ column = offset_out = 0;
+ goto rescan;
+ }
+
+ line_out[offset_out++] = c;
+ }
+
+ if (offset_out)
+ fwrite (line_out, sizeof (char), offset_out, stdout);
+
+ if (ferror (istream))
+ {
+ error (0, errno, "%s", filename);
+ if (strcmp (filename, "-"))
+ fclose (istream);
+ return 1;
+ }
+ if (strcmp (filename, "-") && fclose (istream) == EOF)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+
+ if (ferror (stdout))
+ {
+ error (0, errno, "write error");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Assuming the current column is COLUMN, return the column that
+ printing C will move the cursor to.
+ The first column is 0. */
+
+int
+adjust_column (column, c)
+ int column;
+ char c;
+{
+ if (!count_bytes)
+ {
+ if (c == '\b')
+ {
+ if (column > 0)
+ column--;
+ }
+ else if (c == '\r')
+ column = 0;
+ else if (c == '\t')
+ column = column + 8 - column % 8;
+ else /* if (isprint (c)) */
+ column++;
+ }
+ else
+ column++;
+ return column;
+}
diff --git a/src/head.c b/src/head.c
new file mode 100644
index 000000000..0302b60ac
--- /dev/null
+++ b/src/head.c
@@ -0,0 +1,380 @@
+/* head -- output first part of file(s)
+ Copyright (C) 1989, 1990, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Options:
+ -b Print first N 512-byte blocks.
+ -c, --bytes=N[bkm] Print first N bytes
+ [or 512-byte blocks, kilobytes, or megabytes].
+ -k Print first N kilobytes.
+ -N, -l, -n, --lines=N Print first N lines.
+ -m Print first N megabytes.
+ -q, --quiet, --silent Never print filename headers.
+ -v, --verbose Always print filename headers.
+
+ Reads from standard input if no files are given or when a filename of
+ ``-'' is encountered.
+ By default, filename headers are printed only if more than one file
+ is given.
+ By default, prints the first 10 lines (head -n 10).
+
+ David MacKenzie <djm@ai.mit.edu> */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
+#else
+#define ISDIGIT(c) (isdigit ((c)))
+#endif
+
+/* Number of lines/chars/blocks to head. */
+#define DEFAULT_NUMBER 10
+
+/* Size of atomic reads. */
+#define BUFSIZE (512 * 8)
+
+/* Number of bytes per item we are printing.
+ If 0, head in lines. */
+int unit_size;
+
+/* If nonzero, print filename headers. */
+int print_headers;
+
+/* When to print the filename banners. */
+enum header_mode
+{
+ multiple_files, always, never
+};
+
+int head ();
+int head_bytes ();
+int head_file ();
+int head_lines ();
+long atou ();
+void error ();
+void parse_unit ();
+void usage ();
+void write_header ();
+void xwrite ();
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Have we ever read standard input? */
+int have_read_stdin;
+
+struct option long_options[] =
+{
+ {"bytes", 1, NULL, 'c'},
+ {"lines", 1, NULL, 'n'},
+ {"quiet", 0, NULL, 'q'},
+ {"silent", 0, NULL, 'q'},
+ {"verbose", 0, NULL, 'v'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ enum header_mode header_mode = multiple_files;
+ int exit_status = 0;
+ long number = -1; /* Number of items to print (-1 if undef.). */
+ int c; /* Option character. */
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+ unit_size = 0;
+ print_headers = 0;
+
+ if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
+ {
+ /* Old option syntax; a dash, one or more digits, and one or
+ more option letters. Move past the number. */
+ for (number = 0, ++argv[1]; ISDIGIT (*argv[1]); ++argv[1])
+ number = number * 10 + *argv[1] - '0';
+ /* Parse any appended option letters. */
+ while (*argv[1])
+ {
+ switch (*argv[1])
+ {
+ case 'b':
+ unit_size = 512;
+ break;
+
+ case 'c':
+ unit_size = 1;
+ break;
+
+ case 'k':
+ unit_size = 1024;
+ break;
+
+ case 'l':
+ unit_size = 0;
+ break;
+
+ case 'm':
+ unit_size = 1048576;
+ break;
+
+ case 'q':
+ header_mode = never;
+ break;
+
+ case 'v':
+ header_mode = always;
+ break;
+
+ default:
+ error (0, 0, "unrecognized option `-%c'", *argv[1]);
+ usage ();
+ }
+ ++argv[1];
+ }
+ /* Make the options we just parsed invisible to getopt. */
+ argv[1] = argv[0];
+ argv++;
+ argc--;
+ }
+
+ while ((c = getopt_long (argc, argv, "c:n:qv", long_options, (int *) 0))
+ != EOF)
+ {
+ switch (c)
+ {
+ case 'c':
+ unit_size = 1;
+ parse_unit (optarg);
+ goto getnum;
+ case 'n':
+ unit_size = 0;
+ getnum:
+ number = atou (optarg);
+ if (number == -1)
+ error (1, 0, "invalid number `%s'", optarg);
+ break;
+
+ case 'q':
+ header_mode = never;
+ break;
+
+ case 'v':
+ header_mode = always;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (number == -1)
+ number = DEFAULT_NUMBER;
+
+ if (unit_size > 1)
+ number *= unit_size;
+
+ if (header_mode == always
+ || (header_mode == multiple_files && optind < argc - 1))
+ print_headers = 1;
+
+ if (optind == argc)
+ exit_status |= head_file ("-", number);
+
+ for (; optind < argc; ++optind)
+ exit_status |= head_file (argv[optind], number);
+
+ if (have_read_stdin && close (0) < 0)
+ error (1, errno, "-");
+ if (close (1) < 0)
+ error (1, errno, "write error");
+
+ exit (exit_status);
+}
+
+int
+head_file (filename, number)
+ char *filename;
+ long number;
+{
+ int fd;
+
+ if (!strcmp (filename, "-"))
+ {
+ have_read_stdin = 1;
+ filename = "standard input";
+ if (print_headers)
+ write_header (filename);
+ return head (filename, 0, number);
+ }
+ else
+ {
+ fd = open (filename, O_RDONLY);
+ if (fd >= 0)
+ {
+ int errors;
+
+ if (print_headers)
+ write_header (filename);
+ errors = head (filename, fd, number);
+ if (close (fd) == 0)
+ return errors;
+ }
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+}
+
+void
+write_header (filename)
+ char *filename;
+{
+ static int first_file = 1;
+
+ if (first_file)
+ {
+ xwrite (1, "==> ", 4);
+ first_file = 0;
+ }
+ else
+ xwrite (1, "\n==> ", 5);
+ xwrite (1, filename, strlen (filename));
+ xwrite (1, " <==\n", 5);
+}
+
+int
+head (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ if (unit_size)
+ return head_bytes (filename, fd, number);
+ else
+ return head_lines (filename, fd, number);
+}
+
+int
+head_bytes (filename, fd, bytes_to_write)
+ char *filename;
+ int fd;
+ long bytes_to_write;
+{
+ char buffer[BUFSIZE];
+ int bytes_read;
+
+ while (bytes_to_write)
+ {
+ bytes_read = read (fd, buffer, BUFSIZE);
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+ if (bytes_read == 0)
+ break;
+ if (bytes_read > bytes_to_write)
+ bytes_read = bytes_to_write;
+ xwrite (1, buffer, bytes_read);
+ bytes_to_write -= bytes_read;
+ }
+ return 0;
+}
+
+int
+head_lines (filename, fd, lines_to_write)
+ char *filename;
+ int fd;
+ long lines_to_write;
+{
+ char buffer[BUFSIZE];
+ int bytes_read;
+ int bytes_to_write;
+
+ while (lines_to_write)
+ {
+ bytes_read = read (fd, buffer, BUFSIZE);
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+ if (bytes_read == 0)
+ break;
+ bytes_to_write = 0;
+ while (bytes_to_write < bytes_read)
+ if (buffer[bytes_to_write++] == '\n' && --lines_to_write == 0)
+ break;
+ xwrite (1, buffer, bytes_to_write);
+ }
+ return 0;
+}
+
+void
+parse_unit (str)
+ char *str;
+{
+ int arglen = strlen (str);
+
+ if (arglen == 0)
+ return;
+
+ switch (str[arglen - 1])
+ {
+ case 'b':
+ unit_size = 512;
+ str[arglen - 1] = '\0';
+ break;
+ case 'k':
+ unit_size = 1024;
+ str[arglen - 1] = '\0';
+ break;
+ case 'm':
+ unit_size = 1048576;
+ str[arglen - 1] = '\0';
+ break;
+ }
+}
+
+/* Convert STR, a string of ASCII digits, into an unsigned integer.
+ Return -1 if STR does not represent a valid unsigned integer. */
+
+long
+atou (str)
+ char *str;
+{
+ int value;
+
+ for (value = 0; ISDIGIT (*str); ++str)
+ value = value * 10 + *str - '0';
+ return *str ? -1 : value;
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-c N[bkm]] [-n N] [-qv] [--bytes=N[bkm]] [--lines=N]\n\
+ [--quiet] [--silent] [--verbose] [file...]\n\
+ %s [-Nbcklmqv] [file...]\n", program_name, program_name);
+ exit (1);
+}
diff --git a/src/join.c b/src/join.c
new file mode 100644
index 000000000..9ac82e0fd
--- /dev/null
+++ b/src/join.c
@@ -0,0 +1,690 @@
+/* join - join lines of two files on a common field
+ Copyright (C) 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Written by Mike Haertel, mike@gnu.ai.mit.edu. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <sys/types.h>
+#include <getopt.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISSPACE(c) (isascii(c) && isspace(c))
+#define ISDIGIT(c) (isascii(c) && isdigit(c))
+#else
+#define ISSPACE(c) isspace(c)
+#define ISDIGIT(c) isdigit(c)
+#endif
+
+char *xmalloc ();
+char *xrealloc ();
+void error ();
+static void usage ();
+
+#define min(A, B) ((A) < (B) ? (A) : (B))
+
+/* An element of the list describing the format of each
+ output line. */
+struct outlist
+{
+ int file; /* File to take field from (1 or 2). */
+ int field; /* Field number to print. */
+ struct outlist *next;
+};
+
+/* A field of a line. */
+struct field
+{
+ char *beg; /* First character in field. */
+ char *lim; /* Character after last character in field. */
+};
+
+/* A line read from an input file. Newlines are not stored. */
+struct line
+{
+ char *beg; /* First character in line. */
+ char *lim; /* Character after last character in line. */
+ int nfields; /* Number of elements in `fields'. */
+ struct field *fields;
+};
+
+/* One or more consecutive lines read from a file that all have the
+ same join field value. */
+struct seq
+{
+ int count; /* Elements used in `lines'. */
+ int alloc; /* Elements allocated in `lines'. */
+ struct line *lines;
+};
+
+/* If nonzero, print unpairable lines in file 1 or 2. */
+static int print_unpairables_1, print_unpairables_2;
+
+/* If nonzero, print pairable lines. */
+static int print_pairables;
+
+/* Empty output field filler. */
+static char *empty_filler;
+
+/* Field to join on. */
+static int join_field_1, join_field_2;
+
+/* List of fields to print. */
+struct outlist *outlist;
+
+/* Last element in `outlist', where a new element can be added. */
+struct outlist *outlist_end;
+
+/* Tab character separating fields; if this is NUL fields are separated
+ by any nonempty string of white space, otherwise by exactly one
+ tab character. */
+static char tab;
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Fill in the `fields' structure in LINE. */
+
+static void
+xfields (line)
+ struct line *line;
+{
+ static int nfields = 2;
+ int i;
+ register char *ptr, *lim;
+
+ line->fields = (struct field *) xmalloc (nfields * sizeof (struct field));
+
+ ptr = line->beg;
+ lim = line->lim;
+
+ for (i = 0; ptr < lim; ++i)
+ {
+ if (i == nfields)
+ {
+ nfields *= 2;
+ line->fields = (struct field *)
+ xrealloc ((char *) line->fields, nfields * sizeof (struct field));
+ }
+ if (tab)
+ {
+ line->fields[i].beg = ptr;
+ while (ptr < lim && *ptr != tab)
+ ++ptr;
+ line->fields[i].lim = ptr;
+ if (ptr < lim)
+ ++ptr;
+ }
+ else
+ {
+ line->fields[i].beg = ptr;
+ while (ptr < lim && !ISSPACE (*ptr))
+ ++ptr;
+ line->fields[i].lim = ptr;
+ while (ptr < lim && ISSPACE (*ptr))
+ ++ptr;
+ }
+ }
+
+ line->nfields = i;
+}
+
+/* Read a line from FP into LINE and split it into fields.
+ Return 0 if EOF, 1 otherwise. */
+
+static int
+get_line (fp, line)
+ FILE *fp;
+ struct line *line;
+{
+ static int linesize = 80;
+ int c, i;
+ char *ptr;
+
+ if (feof (fp))
+ return 0;
+
+ ptr = xmalloc (linesize);
+
+ for (i = 0; (c = getc (fp)) != EOF && c != '\n'; ++i)
+ {
+ if (i == linesize)
+ {
+ linesize *= 2;
+ ptr = xrealloc (ptr, linesize);
+ }
+ ptr[i] = c;
+ }
+
+ if (c == EOF && i == 0)
+ {
+ free (ptr);
+ return 0;
+ }
+
+ line->beg = ptr;
+ line->lim = line->beg + i;
+ xfields (line);
+ return 1;
+}
+
+static void
+freeline (line)
+ struct line *line;
+{
+ free ((char *) line->fields);
+ free (line->beg);
+}
+
+static void
+initseq (seq)
+ struct seq *seq;
+{
+ seq->count = 0;
+ seq->alloc = 1;
+ seq->lines = (struct line *) xmalloc (seq->alloc * sizeof (struct line));
+}
+
+/* Read a line from FP and add it to SEQ. Return 0 if EOF, 1 otherwise. */
+
+static int
+getseq (fp, seq)
+ FILE *fp;
+ struct seq *seq;
+{
+ if (seq->count == seq->alloc)
+ {
+ seq->alloc *= 2;
+ seq->lines = (struct line *)
+ xrealloc ((char *) seq->lines, seq->alloc * sizeof (struct line));
+ }
+
+ if (get_line (fp, &seq->lines[seq->count]))
+ {
+ ++seq->count;
+ return 1;
+ }
+ return 0;
+}
+
+static void
+delseq (seq)
+ struct seq *seq;
+{
+ free ((char *) seq->lines);
+}
+
+/* Return <0 if the join field in LINE1 compares less than the one in LINE2;
+ >0 if it compares greater; 0 if it compares equal. */
+
+static int
+keycmp (line1, line2)
+ struct line *line1;
+ struct line *line2;
+{
+ char *beg1, *beg2; /* Start of field to compare in each file. */
+ int len1, len2; /* Length of fields to compare. */
+ int diff;
+
+ if (join_field_1 < line1->nfields)
+ {
+ beg1 = line1->fields[join_field_1].beg;
+ len1 = line1->fields[join_field_1].lim
+ - line1->fields[join_field_1].beg;
+ }
+ else
+ {
+ beg1 = NULL;
+ len1 = 0;
+ }
+
+ if (join_field_2 < line2->nfields)
+ {
+ beg2 = line2->fields[join_field_2].beg;
+ len2 = line2->fields[join_field_2].lim
+ - line2->fields[join_field_2].beg;
+ }
+ else
+ {
+ beg2 = NULL;
+ len2 = 0;
+ }
+
+ if (len1 == 0)
+ return len2 == 0 ? 0 : -1;
+ if (len2 == 0)
+ return 1;
+ diff = memcmp (beg1, beg2, min (len1, len2));
+ if (diff)
+ return diff;
+ return len1 - len2;
+}
+
+/* Print field N of LINE if it exists and is nonempty, otherwise
+ `empty_filler' if it is nonempty. */
+
+static void
+prfield (n, line)
+ int n;
+ struct line *line;
+{
+ int len;
+
+ if (n < line->nfields)
+ {
+ len = line->fields[n].lim - line->fields[n].beg;
+ if (len)
+ fwrite (line->fields[n].beg, 1, len, stdout);
+ else if (empty_filler)
+ fputs (empty_filler, stdout);
+ }
+ else if (empty_filler)
+ fputs (empty_filler, stdout);
+}
+
+/* Print LINE, with its fields separated by `tab'. */
+
+static void
+prline (line)
+ struct line *line;
+{
+ int i;
+
+ for (i = 0; i < line->nfields; ++i)
+ {
+ prfield (i, line);
+ if (i == line->nfields - 1)
+ putchar ('\n');
+ else
+ putchar (tab ? tab : ' ');
+ }
+}
+
+/* Print the join of LINE1 and LINE2. */
+
+static void
+prjoin (line1, line2)
+ struct line *line1;
+ struct line *line2;
+{
+ if (outlist)
+ {
+ struct outlist *o;
+
+ prfield (outlist->field - 1, outlist->file == 1 ? line1 : line2);
+ for (o = outlist->next; o; o = o->next)
+ {
+ putchar (tab ? tab : ' ');
+ prfield (o->field - 1, o->file == 1 ? line1 : line2);
+ }
+ putchar ('\n');
+ }
+ else
+ {
+ int i;
+
+ prfield (join_field_1, line1);
+ for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
+ {
+ putchar (tab ? tab : ' ');
+ prfield (i, line1);
+ }
+ for (i = join_field_1 + 1; i < line1->nfields; ++i)
+ {
+ putchar (tab ? tab : ' ');
+ prfield (i, line1);
+ }
+
+ for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
+ {
+ putchar (tab ? tab : ' ');
+ prfield (i, line2);
+ }
+ for (i = join_field_2 + 1; i < line2->nfields; ++i)
+ {
+ putchar (tab ? tab : ' ');
+ prfield (i, line2);
+ }
+ putchar ('\n');
+ }
+}
+
+/* Print the join of the files in FP1 and FP2. */
+
+static void
+join (fp1, fp2)
+ FILE *fp1;
+ FILE *fp2;
+{
+ struct seq seq1, seq2;
+ struct line line;
+ int diff, i, j, eof1, eof2;
+
+ /* Read the first line of each file. */
+ initseq (&seq1);
+ getseq (fp1, &seq1);
+ initseq (&seq2);
+ getseq (fp2, &seq2);
+
+ while (seq1.count && seq2.count)
+ {
+ diff = keycmp (&seq1.lines[0], &seq2.lines[0]);
+ if (diff < 0)
+ {
+ if (print_unpairables_1)
+ prline (&seq1.lines[0]);
+ freeline (&seq1.lines[0]);
+ seq1.count = 0;
+ getseq (fp1, &seq1);
+ continue;
+ }
+ if (diff > 0)
+ {
+ if (print_unpairables_2)
+ prline (&seq2.lines[0]);
+ freeline (&seq2.lines[0]);
+ seq2.count = 0;
+ getseq (fp2, &seq2);
+ continue;
+ }
+
+ /* Keep reading lines from file1 as long as they continue to
+ match the current line from file2. */
+ eof1 = 0;
+ do
+ if (!getseq (fp1, &seq1))
+ {
+ eof1 = 1;
+ ++seq1.count;
+ break;
+ }
+ while (!keycmp (&seq1.lines[seq1.count - 1], &seq2.lines[0]));
+
+ /* Keep reading lines from file2 as long as they continue to
+ match the current line from file1. */
+ eof2 = 0;
+ do
+ if (!getseq (fp2, &seq2))
+ {
+ eof2 = 1;
+ ++seq2.count;
+ break;
+ }
+ while (!keycmp (&seq1.lines[0], &seq2.lines[seq2.count - 1]));
+
+ if (print_pairables)
+ {
+ for (i = 0; i < seq1.count - 1; ++i)
+ for (j = 0; j < seq2.count - 1; ++j)
+ prjoin (&seq1.lines[i], &seq2.lines[j]);
+ }
+
+ for (i = 0; i < seq1.count - 1; ++i)
+ freeline (&seq1.lines[i]);
+ if (!eof1)
+ {
+ seq1.lines[0] = seq1.lines[seq1.count - 1];
+ seq1.count = 1;
+ }
+ else
+ seq1.count = 0;
+
+ for (i = 0; i < seq2.count - 1; ++i)
+ freeline (&seq2.lines[i]);
+ if (!eof2)
+ {
+ seq2.lines[0] = seq2.lines[seq2.count - 1];
+ seq2.count = 1;
+ }
+ else
+ seq2.count = 0;
+ }
+
+ if (print_unpairables_1 && seq1.count)
+ {
+ prline (&seq1.lines[0]);
+ freeline (&seq1.lines[0]);
+ while (get_line (fp1, &line))
+ {
+ prline (&line);
+ freeline (&line);
+ }
+ }
+
+ if (print_unpairables_2 && seq2.count)
+ {
+ prline (&seq2.lines[0]);
+ freeline (&seq2.lines[0]);
+ while (get_line (fp2, &line))
+ {
+ prline (&line);
+ freeline (&line);
+ }
+ }
+
+ delseq (&seq1);
+ delseq (&seq2);
+}
+
+/* Add a field spec for field FIELD of file FILE to `outlist' and return 1,
+ unless either argument is invalid; then just return 0. */
+
+static int
+add_field (file, field)
+ int file;
+ int field;
+{
+ struct outlist *o;
+
+ if (file < 1 || file > 2 || field < 1)
+ return 0;
+ o = (struct outlist *) xmalloc (sizeof (struct outlist));
+ o->file = file;
+ o->field = field;
+ o->next = NULL;
+
+ /* Add to the end of the list so the fields are in the right order. */
+ if (outlist == NULL)
+ outlist = o;
+ else
+ outlist_end->next = o;
+ outlist_end = o;
+
+ return 1;
+}
+
+/* Add the comma or blank separated field spec(s) in STR to `outlist'.
+ Return the number of fields added. */
+
+static int
+add_field_list (str)
+ char *str;
+{
+ int added = 0;
+ int file = -1, field = -1;
+ int dot_found = 0;
+
+ for (; *str; str++)
+ {
+ if (*str == ',' || isblank (*str))
+ {
+ added += add_field (file, field);
+ file = field = -1;
+ dot_found = 0;
+ }
+ else if (*str == '.')
+ dot_found = 1;
+ else if (ISDIGIT (*str))
+ {
+ if (!dot_found)
+ {
+ if (file == -1)
+ file = 0;
+ file = file * 10 + *str - '0';
+ }
+ else
+ {
+ if (field == -1)
+ field = 0;
+ field = field * 10 + *str - '0';
+ }
+ }
+ else
+ return 0;
+ }
+
+ added += add_field (file, field);
+ return added;
+}
+
+/* When using getopt_long_only, no long option can start with
+ a character that is a short option. */
+static struct option longopts[] =
+{
+ {"j", 1, NULL, 'j'},
+ {"j1", 1, NULL, '1'},
+ {"j2", 1, NULL, '2'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ char *names[2];
+ FILE *fp1, *fp2;
+ int optc, prev_optc = 0, nfiles, val;
+
+ program_name = argv[0];
+ nfiles = 0;
+ print_pairables = 1;
+
+ while ((optc = getopt_long_only (argc, argv, "-a:e:1:2:o:t:v:", longopts,
+ (int *) 0)) != EOF)
+ {
+ switch (optc)
+ {
+ case 'a':
+ val = atoi (optarg);
+ if (val == 1)
+ print_unpairables_1 = 1;
+ else if (val == 2)
+ print_unpairables_2 = 1;
+ else
+ error (2, 0, "invalid file number for `-a'");
+ break;
+
+ case 'e':
+ empty_filler = optarg;
+ break;
+
+ case '1':
+ val = atoi (optarg);
+ if (val <= 0)
+ error (2, 0, "invalid field number for `-1'");
+ join_field_1 = val - 1;
+ break;
+
+ case '2':
+ val = atoi (optarg);
+ if (val <= 0)
+ error (2, 0, "invalid field number for `-2'");
+ join_field_2 = val - 1;
+ break;
+
+ case 'j':
+ val = atoi (optarg);
+ if (val <= 0)
+ error (2, 0, "invalid field number for `-j'");
+ join_field_1 = join_field_2 = val - 1;
+ break;
+
+ case 'o':
+ if (add_field_list (optarg) == 0)
+ error (2, 0, "invalid field list for `-o'");
+ break;
+
+ case 't':
+ tab = *optarg;
+ break;
+
+ case 'v':
+ val = atoi (optarg);
+ if (val == 1)
+ print_unpairables_1 = 1;
+ else if (val == 2)
+ print_unpairables_2 = 1;
+ else
+ error (2, 0, "invalid file number for `-v'");
+ print_pairables = 0;
+ break;
+
+ case 1: /* Non-option argument. */
+ if (prev_optc == 'o')
+ {
+ /* Might be continuation of args to -o. */
+ if (add_field_list (optarg) > 0)
+ continue; /* Don't change `prev_optc'. */
+ }
+
+ if (nfiles > 1)
+ usage ();
+ names[nfiles++] = optarg;
+ break;
+
+ case '?':
+ usage ();
+ }
+ prev_optc = optc;
+ }
+
+ if (nfiles != 2)
+ usage ();
+
+ fp1 = strcmp (names[0], "-") ? fopen (names[0], "r") : stdin;
+ if (!fp1)
+ error (1, errno, "%s", names[0]);
+ fp2 = strcmp (names[1], "-") ? fopen (names[1], "r") : stdin;
+ if (!fp2)
+ error (1, errno, "%s", names[1]);
+ if (fp1 == fp2)
+ error (1, errno, "both files cannot be standard input");
+ join (fp1, fp2);
+
+ if ((fp1 == stdin || fp2 == stdin) && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, 0, "write error");
+
+ exit (0);
+}
+
+static void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-a 1|2] [-v 1|2] [-e empty-string] [-o field-list...] [-t char]\n\
+ [-j[1|2] field] [-1 field] [-2 field] file1 file2\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/nl.c b/src/nl.c
new file mode 100644
index 000000000..368001adc
--- /dev/null
+++ b/src/nl.c
@@ -0,0 +1,546 @@
+/* nl -- number lines of files
+ Copyright (C) 1989, 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Scott Bartram (nancy!scott@uunet.uu.net)
+ Revised by David MacKenzie (djm@ai.mit.edu) */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <getopt.h>
+#include <regex.h>
+#include "linebuffer.h"
+#include "system.h"
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+/* Line-number formats. */
+enum number_format
+{
+ FORMAT_RIGHT_NOLZ, /* Right justified, no leading zeroes. */
+ FORMAT_RIGHT_LZ, /* Right justified, leading zeroes. */
+ FORMAT_LEFT /* Left justified, no leading zeroes. */
+};
+
+/* Default section delimiter characters. */
+#define DEFAULT_SECTION_DELIMITERS "\\:"
+
+/* Types of input lines: either one of the section delimiters,
+ or text to output. */
+enum section
+{
+ Header, Body, Footer, Text
+};
+
+/* Format of body lines (-b). */
+char *body_type = "t";
+
+/* Format of header lines (-h). */
+char *header_type = "n";
+
+/* Format of footer lines (-f). */
+char *footer_type = "n";
+
+/* Format currently being used (body, header, or footer). */
+char *current_type;
+
+/* Regex for body lines to number (-bp). */
+struct re_pattern_buffer body_regex;
+
+/* Regex for header lines to number (-hp). */
+struct re_pattern_buffer header_regex;
+
+/* Regex for footer lines to number (-fp). */
+struct re_pattern_buffer footer_regex;
+
+/* Pointer to current regex, if any. */
+struct re_pattern_buffer *current_regex = NULL;
+
+/* Separator string to print after line number (-s). */
+char *separator_str = "\t";
+
+/* Input section delimiter string (-d). */
+char *section_del = DEFAULT_SECTION_DELIMITERS;
+
+/* Header delimiter string. */
+char *header_del = NULL;
+
+/* Header section delimiter length. */
+int header_del_len;
+
+/* Body delimiter string. */
+char *body_del = NULL;
+
+/* Body section delimiter length. */
+int body_del_len;
+
+/* Footer delimiter string. */
+char *footer_del = NULL;
+
+/* Footer section delimiter length. */
+int footer_del_len;
+
+/* Input buffer. */
+struct linebuffer line_buf;
+
+/* printf format string for line number. */
+char *print_fmt;
+
+/* printf format string for unnumbered lines. */
+char *print_no_line_fmt = NULL;
+
+/* Starting line number on each page (-v). */
+int page_start = 1;
+
+/* Line number increment (-i). */
+int page_incr = 1;
+
+/* If TRUE, reset line number at start of each page (-p). */
+int reset_numbers = TRUE;
+
+/* Number of blank lines to consider to be one line for numbering (-l). */
+int blank_join = 1;
+
+/* Width of line numbers (-w). */
+int lineno_width = 6;
+
+/* Line number format (-n). */
+enum number_format lineno_format = FORMAT_RIGHT_NOLZ;
+
+/* Current print line number. */
+int line_no;
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Nonzero if we have ever read standard input. */
+int have_read_stdin;
+
+enum section check_section ();
+char *xmalloc ();
+char *xrealloc ();
+int build_type_arg ();
+int nl_file ();
+void usage ();
+void process_file ();
+void proc_header ();
+void proc_body ();
+void proc_footer ();
+void proc_text ();
+void print_lineno ();
+void build_print_fmt ();
+void error ();
+
+struct option longopts[] =
+{
+ {"header-numbering", 1, NULL, 'h'},
+ {"body-numbering", 1, NULL, 'b'},
+ {"footer-numbering", 1, NULL, 'f'},
+ {"first-page", 1, NULL, 'v'},
+ {"page-increment", 1, NULL, 'i'},
+ {"no-renumber", 0, NULL, 'p'},
+ {"join-blank-lines", 1, NULL, 'l'},
+ {"number-separator", 1, NULL, 's'},
+ {"number-width", 1, NULL, 'w'},
+ {"number-format", 1, NULL, 'n'},
+ {"section-delimiter", 1, NULL, 'd'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c, exit_status = 0;
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+
+ while ((c = getopt_long (argc, argv, "h:b:f:v:i:pl:s:w:n:d:", longopts,
+ (int *) 0)) != EOF)
+ {
+ switch (c)
+ {
+ case 'h':
+ if (build_type_arg (&header_type, &header_regex) != TRUE)
+ usage ();
+ break;
+ case 'b':
+ if (build_type_arg (&body_type, &body_regex) != TRUE)
+ usage ();
+ break;
+ case 'f':
+ if (build_type_arg (&footer_type, &footer_regex) != TRUE)
+ usage ();
+ break;
+ case 'v':
+ page_start = atoi (optarg);
+ break;
+ case 'i':
+ page_incr = atoi (optarg);
+ if (page_incr < 1)
+ page_incr = 1;
+ break;
+ case 'p':
+ reset_numbers = FALSE;
+ break;
+ case 'l':
+ blank_join = atoi (optarg);
+ break;
+ case 's':
+ separator_str = optarg;
+ break;
+ case 'w':
+ lineno_width = atoi (optarg);
+ if (lineno_width < 1)
+ lineno_width = 1;
+ break;
+ case 'n':
+ switch (*optarg)
+ {
+ case 'l':
+ if (optarg[1] == 'n')
+ lineno_format = FORMAT_LEFT;
+ else
+ usage ();
+ break;
+ case 'r':
+ switch (optarg[1])
+ {
+ case 'n':
+ lineno_format = FORMAT_RIGHT_NOLZ;
+ break;
+ case 'z':
+ lineno_format = FORMAT_RIGHT_LZ;
+ break;
+ default:
+ usage ();
+ break;
+ }
+ break;
+ default:
+ usage ();
+ break;
+ }
+ break;
+ case 'd':
+ section_del = optarg;
+ break;
+ default:
+ usage ();
+ break;
+ }
+ }
+
+ /* Initialize the section delimiters. */
+ c = strlen (section_del);
+
+ header_del_len = c * 3;
+ header_del = xmalloc (header_del_len + 1);
+ strcat (strcat (strcpy (header_del, section_del), section_del), section_del);
+
+ body_del_len = c * 2;
+ body_del = xmalloc (body_del_len + 1);
+ strcat (strcpy (body_del, section_del), section_del);
+
+ footer_del_len = c;
+ footer_del = xmalloc (footer_del_len + 1);
+ strcpy (footer_del, section_del);
+
+ /* Initialize the input buffer. */
+ initbuffer (&line_buf);
+
+ /* Initialize the printf format for unnumbered lines. */
+ c = strlen (separator_str);
+ print_no_line_fmt = xmalloc (lineno_width + c + 1);
+ memset (print_no_line_fmt, ' ', lineno_width + c);
+ print_no_line_fmt[lineno_width + c] = '\0';
+
+ line_no = page_start;
+ current_type = body_type;
+ current_regex = &body_regex;
+ build_print_fmt ();
+
+ /* Main processing. */
+
+ if (optind == argc)
+ exit_status |= nl_file ("-");
+ else
+ for (; optind < argc; optind++)
+ exit_status |= nl_file (argv[optind]);
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ {
+ error (0, errno, "-");
+ exit_status = 1;
+ }
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, 0, "write error");
+
+ exit (exit_status);
+}
+
+/* Process file FILE to standard output.
+ Return 0 if successful, 1 if not. */
+
+int
+nl_file (file)
+ char *file;
+{
+ FILE *stream;
+
+ if (!strcmp (file, "-"))
+ {
+ have_read_stdin = 1;
+ stream = stdin;
+ }
+ else
+ {
+ stream = fopen (file, "r");
+ if (stream == NULL)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ }
+
+ process_file (stream);
+
+ if (ferror (stream))
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ if (!strcmp (file, "-"))
+ clearerr (stream); /* Also clear EOF. */
+ else if (fclose (stream) == EOF)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ return 0;
+}
+
+/* Read and process the file pointed to by FP. */
+
+void
+process_file (fp)
+ FILE *fp;
+{
+ while (readline (&line_buf, fp))
+ {
+ switch ((int) check_section ())
+ {
+ case Header:
+ proc_header ();
+ break;
+ case Body:
+ proc_body ();
+ break;
+ case Footer:
+ proc_footer ();
+ break;
+ case Text:
+ proc_text ();
+ break;
+ }
+ }
+}
+
+/* Return the type of line in `line_buf'. */
+
+enum section
+check_section ()
+{
+ if (line_buf.length < 2 || memcmp (line_buf.buffer, section_del, 2))
+ return Text;
+ if (line_buf.length == header_del_len
+ && !memcmp (line_buf.buffer, header_del, header_del_len))
+ return Header;
+ if (line_buf.length == body_del_len
+ && !memcmp (line_buf.buffer, body_del, body_del_len))
+ return Body;
+ if (line_buf.length == footer_del_len
+ && !memcmp (line_buf.buffer, footer_del, footer_del_len))
+ return Footer;
+ return Text;
+}
+
+/* Switch to a header section. */
+
+void
+proc_header ()
+{
+ current_type = header_type;
+ current_regex = &header_regex;
+ if (reset_numbers)
+ line_no = page_start;
+ putchar ('\n');
+}
+
+/* Switch to a body section. */
+
+void
+proc_body ()
+{
+ current_type = body_type;
+ current_regex = &body_regex;
+ putchar ('\n');
+}
+
+/* Switch to a footer section. */
+
+void
+proc_footer ()
+{
+ current_type = footer_type;
+ current_regex = &footer_regex;
+ putchar ('\n');
+}
+
+/* Process a regular text line in `line_buf'. */
+
+void
+proc_text ()
+{
+ static int blank_lines = 0; /* Consecutive blank lines so far. */
+
+ switch (*current_type)
+ {
+ case 'a':
+ if (blank_join > 1)
+ {
+ if (line_buf.length || ++blank_lines == blank_join)
+ {
+ print_lineno ();
+ blank_lines = 0;
+ }
+ else
+ printf (print_no_line_fmt);
+ }
+ else
+ print_lineno ();
+ break;
+ case 't':
+ if (line_buf.length)
+ print_lineno ();
+ else
+ printf (print_no_line_fmt);
+ break;
+ case 'n':
+ printf (print_no_line_fmt);
+ break;
+ case 'p':
+ if (re_search (current_regex, line_buf.buffer, line_buf.length,
+ 0, line_buf.length, (struct re_registers *) 0) < 0)
+ printf (print_no_line_fmt);
+ else
+ print_lineno ();
+ break;
+ }
+ fwrite (line_buf.buffer, sizeof (char), line_buf.length, stdout);
+ putchar ('\n');
+}
+
+/* Print and increment the line number. */
+
+void
+print_lineno ()
+{
+ printf (print_fmt, line_no);
+ line_no += page_incr;
+}
+
+/* Build the printf format string, based on `lineno_format'. */
+
+void
+build_print_fmt ()
+{
+ /* 12 = 10 chars for lineno_width, 1 for %, 1 for \0. */
+ print_fmt = xmalloc (strlen (separator_str) + 12);
+ switch (lineno_format)
+ {
+ case FORMAT_RIGHT_NOLZ:
+ sprintf (print_fmt, "%%%dd%s", lineno_width, separator_str);
+ break;
+ case FORMAT_RIGHT_LZ:
+ sprintf (print_fmt, "%%0%dd%s", lineno_width, separator_str);
+ break;
+ case FORMAT_LEFT:
+ sprintf (print_fmt, "%%-%dd%s", lineno_width, separator_str);
+ break;
+ }
+}
+
+/* Set the command line flag TYPEP and possibly the regex pointer REGEXP,
+ according to `optarg'. */
+
+int
+build_type_arg (typep, regexp)
+ char **typep;
+ struct re_pattern_buffer *regexp;
+{
+ char *errmsg;
+ int rval = TRUE;
+ int optlen;
+
+ switch (*optarg)
+ {
+ case 'a':
+ case 't':
+ case 'n':
+ *typep = optarg;
+ break;
+ case 'p':
+ *typep = optarg++;
+ optlen = strlen (optarg);
+ regexp->allocated = optlen * 2;
+ regexp->buffer = (unsigned char *) xmalloc (regexp->allocated);
+ regexp->translate = NULL;
+ regexp->fastmap = xmalloc (256);
+ regexp->fastmap_accurate = 0;
+ errmsg = re_compile_pattern (optarg, optlen, regexp);
+ if (errmsg)
+ error (1, 0, "%s", errmsg);
+ break;
+ default:
+ rval = FALSE;
+ break;
+ }
+ return rval;
+}
+
+/* Print a usage message and quit. */
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-h header-style] [-b body-style] [-f footer-style] [-p] [-d cc]\n\
+ [-v start-number] [-i increment] [-l lines] [-s line-separator]\n\
+ [-w line-no-width] [-n {ln,rn,rz}] [--header-numbering=style]\n\
+ [--body-numbering=style] [--footer-numbering=style]\n\
+ [--first-page=number] [--page-increment=number] [--no-renumber]\n\
+ [--join-blank-lines=number] [--number-separator=string]\n\
+ [--number-width=number] [--number-format={ln,rn,rz}]\n\
+ [--section-delimiter=cc] [file...]\n",
+ program_name);
+ exit (2);
+}
diff --git a/src/od.c b/src/od.c
new file mode 100644
index 000000000..f13c6b7bc
--- /dev/null
+++ b/src/od.c
@@ -0,0 +1,1697 @@
+/* od -- dump in octal (and other formats) the contents of files
+ Copyright (C) 1992 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Jim Meyering. */
+
+/* AIX requires this to be the first thing in the file. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not HAVE_ALLOCA_H */
+#ifdef _AIX
+ #pragma alloca
+#else /* not _AIX */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#include <stdio.h>
+#include <ctype.h>
+#include <assert.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+#if defined(__GNUC__) || defined(STDC_HEADERS)
+#include <float.h>
+#endif
+
+#ifdef __GNUC__
+typedef long double LONG_DOUBLE;
+#else
+typedef double LONG_DOUBLE;
+#endif
+
+#if HAVE_LIMITS_H
+#include <limits.h>
+#endif
+#ifndef SCHAR_MAX
+#define SCHAR_MAX 127
+#endif
+#ifndef SHRT_MAX
+#define SHRT_MAX 32767
+#endif
+#ifndef ULONG_MAX
+#define ULONG_MAX ((unsigned long) ~(unsigned long) 0)
+#endif
+
+#define STREQ(a,b) (strcmp((a), (b)) == 0)
+
+#ifndef MAX
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+#ifndef MIN
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#endif
+
+/* The default number of input bytes per output line. */
+#define DEFAULT_BYTES_PER_BLOCK 16
+
+/* The number of decimal digits of precision in a float. */
+#ifndef FLT_DIG
+#define FLT_DIG 7
+#endif
+
+/* The number of decimal digits of precision in a double. */
+#ifndef DBL_DIG
+#define DBL_DIG 15
+#endif
+
+/* The number of decimal digits of precision in a long double. */
+#ifndef LDBL_DIG
+#define LDBL_DIG DBL_DIG
+#endif
+
+char *xmalloc ();
+char *xrealloc ();
+void error ();
+
+enum size_spec
+{
+ NO_SIZE,
+ CHAR,
+ SHORT,
+ INT,
+ LONG,
+ FP_SINGLE,
+ FP_DOUBLE,
+ FP_LONG_DOUBLE
+};
+
+enum output_format
+{
+ SIGNED_DECIMAL,
+ UNSIGNED_DECIMAL,
+ OCTAL,
+ HEXADECIMAL,
+ FLOATING_POINT,
+ NAMED_CHARACTER,
+ CHARACTER
+};
+
+enum strtoul_error
+{
+ UINT_OK, UINT_INVALID, UINT_INVALID_SUFFIX_CHAR, UINT_OVERFLOW
+};
+typedef enum strtoul_error strtoul_error;
+
+/* Each output format specification (from POSIX `-t spec' or from
+ old-style options) is represented by one of these structures. */
+struct tspec
+{
+ enum output_format fmt;
+ enum size_spec size;
+ void (*print_function) ();
+ char *fmt_string;
+};
+
+/* Convert the number of 8-bit bytes of a binary representation to
+ the number of characters (digits + sign if the type is signed)
+ required to represent the same quantity in the specified base/type.
+ For example, a 32-bit (4-byte) quantity may require a field width
+ as wide as the following for these types:
+ 11 unsigned octal
+ 11 signed decimal
+ 10 unsigned decimal
+ 8 unsigned hexadecimal */
+
+static const unsigned int bytes_to_oct_digits[] =
+{0, 3, 6, 8, 11, 14, 16, 19, 22, 25, 27, 30, 32, 35, 38, 41, 43};
+
+static const unsigned int bytes_to_signed_dec_digits[] =
+{1, 4, 6, 8, 11, 13, 16, 18, 20, 23, 25, 28, 30, 33, 35, 37, 40};
+
+static const unsigned int bytes_to_unsigned_dec_digits[] =
+{0, 3, 5, 8, 10, 13, 15, 17, 20, 22, 25, 27, 29, 32, 34, 37, 39};
+
+static const unsigned int bytes_to_hex_digits[] =
+{0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32};
+
+/* Convert enum size_spec to the size of the named type. */
+static const int width_bytes[] =
+{
+ -1,
+ sizeof (char),
+ sizeof (short int),
+ sizeof (int),
+ sizeof (long int),
+ sizeof (float),
+ sizeof (double),
+ sizeof (LONG_DOUBLE)
+};
+
+/* Names for some non-printing characters. */
+static const char *const charname[33] =
+{
+ "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel",
+ "bs", "ht", "nl", "vt", "ff", "cr", "so", "si",
+ "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb",
+ "can", "em", "sub", "esc", "fs", "gs", "rs", "us",
+ "sp"
+};
+
+/* A printf control string for printing a file offset. */
+static const char *output_address_fmt_string;
+
+/* FIXME: make this the number of octal digits in an unsigned long. */
+#define MAX_ADDRESS_LENGTH 13
+static char address_fmt_buffer[MAX_ADDRESS_LENGTH + 1];
+static char address_pad[MAX_ADDRESS_LENGTH + 1];
+
+static unsigned long int string_min;
+static unsigned long int flag_dump_strings;
+
+/* The number of input bytes to skip before formatting and writing. */
+static unsigned long int n_bytes_to_skip = 0;
+
+/* When non-zero, MAX_BYTES_TO_FORMAT is the maximum number of bytes
+ to be read and formatted. Otherwise all input is formatted. */
+static int limit_bytes_to_format = 0;
+
+/* The maximum number of bytes that will be formatted. This
+ value is used only when LIMIT_BYTES_TO_FORMAT is non-zero. */
+static unsigned long int max_bytes_to_format;
+
+/* When non-zero and two or more consecutive blocks are equal, format
+ only the first block and output an asterisk alone on the following
+ line to indicate that identical blocks have been elided. */
+static int abbreviate_duplicate_blocks = 1;
+
+/* An array of specs describing how to format each input block. */
+static struct tspec *spec;
+
+/* The number of format specs. */
+static unsigned int n_specs;
+
+/* The allocated length of SPEC. */
+static unsigned int n_specs_allocated;
+
+/* The number of input bytes formatted per output line. It must be
+ a multiple of the least common multiple of the sizes associated with
+ the specified output types. It should be as large as possible, but
+ no larger than 16 -- unless specified with the -w option. */
+static unsigned int bytes_per_block;
+
+/* Human-readable representation of *file_list (for error messages).
+ It differs from *file_list only when *file_list is "-". */
+static char const *input_filename;
+
+/* A NULL-terminated list of the file-arguments from the command line.
+ If no file-arguments were specified, this variable is initialized
+ to { "-", NULL }. */
+static char const *const *file_list;
+
+/* The input stream associated with the current file. */
+static FILE *in_stream;
+
+#define LONGEST_INTEGRAL_TYPE long int
+
+#define MAX_INTEGRAL_TYPE_SIZE sizeof(LONGEST_INTEGRAL_TYPE)
+static enum size_spec integral_type_size[MAX_INTEGRAL_TYPE_SIZE + 1];
+
+#define MAX_FP_TYPE_SIZE sizeof(LONG_DOUBLE)
+static enum size_spec fp_type_size[MAX_FP_TYPE_SIZE + 1];
+
+static struct option long_options[] =
+{
+ /* POSIX options. */
+ {"skip-bytes", 1, NULL, 'j'},
+ {"address-radix", 1, NULL, 'A'},
+ {"read-bytes", 1, NULL, 'N'},
+ {"format", 1, NULL, 't'},
+ {"output-duplicates", 0, NULL, 'v'},
+
+ /* non-POSIX options. */
+ {"strings", 2, NULL, 's'},
+ {"width", 2, NULL, 'w'},
+ {NULL, 0, NULL, 0}
+};
+
+/* The name this program was run with. */
+char *program_name;
+
+static void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-abcdfhiloxv] [-s[bytes]] [-w[bytes]] [-A radix] [-j bytes]\n\
+ [-N bytes] [-t type] [--skip-bytes=bytes] [--address-radix=radix]\n\
+ [--read-bytes=bytes] [--format=type] [--output-duplicates]\n\
+ [--strings[=bytes]] [--width[=bytes]] [file...]\n",
+ program_name);
+ exit (1);
+}
+
+/* Compute the greatest common denominator of U and V
+ using Euclid's algorithm. */
+
+static unsigned int
+gcd (u, v)
+ unsigned int u;
+ unsigned int v;
+{
+ unsigned int t;
+ while (v != 0)
+ {
+ t = u % v;
+ u = v;
+ v = t;
+ }
+ return u;
+}
+
+/* Compute the least common multiple of U and V. */
+
+static unsigned int
+lcm (u, v)
+ unsigned int u;
+ unsigned int v;
+{
+ unsigned int t = gcd (u, v);
+ if (t == 0)
+ return 0;
+ return u * v / t;
+}
+
+static strtoul_error
+my_strtoul (s, base, val, allow_bkm_suffix)
+ const char *s;
+ int base;
+ long unsigned int *val;
+ int allow_bkm_suffix;
+{
+ char *p;
+ unsigned long int tmp;
+
+ assert (0 <= base && base <= 36);
+
+ tmp = strtoul (s, &p, base);
+ if (errno != 0)
+ return UINT_OVERFLOW;
+ if (p == s)
+ return UINT_INVALID;
+ if (!allow_bkm_suffix)
+ {
+ if (*p == '\0')
+ {
+ *val = tmp;
+ return UINT_OK;
+ }
+ else
+ return UINT_INVALID_SUFFIX_CHAR;
+ }
+
+ switch (*p)
+ {
+ case '\0':
+ break;
+
+#define BKM_SCALE(x,scale_factor) \
+ do \
+ { \
+ if (x > (double) ULONG_MAX / scale_factor) \
+ return UINT_OVERFLOW; \
+ x *= scale_factor; \
+ } \
+ while (0)
+
+ case 'b':
+ BKM_SCALE (tmp, 512);
+ break;
+
+ case 'k':
+ BKM_SCALE (tmp, 1024);
+ break;
+
+ case 'm':
+ BKM_SCALE (tmp, 1024 * 1024);
+ break;
+
+ default:
+ return UINT_INVALID_SUFFIX_CHAR;
+ break;
+ }
+
+ *val = tmp;
+ return UINT_OK;
+}
+
+static void
+uint_fatal_error (str, argument_type_string, err)
+ const char *str;
+ const char *argument_type_string;
+ strtoul_error err;
+{
+ switch (err)
+ {
+ case UINT_OK:
+ abort ();
+
+ case UINT_INVALID:
+ error (2, 0, "invalid %s `%s'", argument_type_string, str);
+ break;
+
+ case UINT_INVALID_SUFFIX_CHAR:
+ error (2, 0, "invalid character following %s `%s'",
+ argument_type_string, str);
+ break;
+
+ case UINT_OVERFLOW:
+ error (2, 0, "%s `%s' larger than maximum unsigned long",
+ argument_type_string, str);
+ break;
+ }
+}
+
+static void
+print_s_char (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes; i > 0; i--)
+ {
+ int tmp = (unsigned) *(unsigned char *) block;
+ if (tmp > SCHAR_MAX)
+ tmp = (SCHAR_MAX - tmp);
+ assert (tmp <= SCHAR_MAX);
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned char);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_char (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes; i > 0; i--)
+ {
+ unsigned int tmp = *(unsigned char *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned char);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_s_short (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (unsigned short); i > 0; i--)
+ {
+ int tmp = (unsigned) *(unsigned short *) block;
+ if (tmp > SHRT_MAX)
+ tmp = (SHRT_MAX - tmp);
+ assert (tmp <= SHRT_MAX);
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned short);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_short (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (unsigned short); i > 0; i--)
+ {
+ unsigned int tmp = *(unsigned short *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned short);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_int (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (unsigned int); i > 0; i--)
+ {
+ unsigned int tmp = *(unsigned int *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned int);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_long (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (unsigned long); i > 0; i--)
+ {
+ unsigned long tmp = *(unsigned long *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (unsigned long);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_float (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (float); i > 0; i--)
+ {
+ float tmp = *(float *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (float);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+static void
+print_double (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (double); i > 0; i--)
+ {
+ double tmp = *(double *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (double);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+#ifdef __GNUC__
+static void
+print_long_double (n_bytes, block, fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *fmt_string;
+{
+ int i, err;
+ err = 0;
+ for (i = n_bytes / sizeof (LONG_DOUBLE); i > 0; i--)
+ {
+ LONG_DOUBLE tmp = *(LONG_DOUBLE *) block;
+ err |= (printf (fmt_string, tmp, (i == 1 ? '\n' : ' ')) == EOF);
+ block += sizeof (LONG_DOUBLE);
+ }
+ if (err)
+ error (2, errno, "standard output");
+}
+
+#endif
+
+static void
+print_named_ascii (n_bytes, block, unused_fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *unused_fmt_string;
+{
+ int i;
+ for (i = n_bytes; i > 0; i--)
+ {
+ unsigned int c = *(unsigned char *) block;
+ unsigned int masked_c = (0x7f & c);
+ const char *s;
+ char buf[5];
+
+ if (masked_c == 127)
+ s = "del";
+ else if (masked_c <= 040)
+ s = charname[masked_c];
+ else
+ {
+ sprintf (buf, " %c", masked_c);
+ s = buf;
+ }
+
+ if (printf ("%3s%c", s, (i == 1 ? '\n' : ' ')) == EOF)
+ error (2, errno, "standard output");
+ block += sizeof (unsigned char);
+ }
+}
+
+static void
+print_ascii (n_bytes, block, unused_fmt_string)
+ long unsigned int n_bytes;
+ const char *block;
+ const char *unused_fmt_string;
+{
+ int i;
+ for (i = n_bytes; i > 0; i--)
+ {
+ unsigned int c = *(unsigned char *) block;
+ const char *s;
+ char buf[5];
+
+ switch (c)
+ {
+ case '\0':
+ s = " \\0";
+ break;
+
+ case '\007':
+ s = " \\a";
+ break;
+
+ case '\b':
+ s = " \\b";
+ break;
+
+ case '\f':
+ s = " \\f";
+ break;
+
+ case '\n':
+ s = " \\n";
+ break;
+
+ case '\r':
+ s = " \\r";
+ break;
+
+ case '\t':
+ s = " \\t";
+ break;
+
+ case '\v':
+ s = " \\v";
+ break;
+
+ default:
+ sprintf (buf, (isprint (c) ? " %c" : "%03o"), c);
+ s = (const char *) buf;
+ }
+
+ if (printf ("%3s%c", s, (i == 1 ? '\n' : ' ')) == EOF)
+ error (2, errno, "standard output");
+ block += sizeof (unsigned char);
+ }
+}
+
+/* Convert a null-terminated (possibly zero-length) string S to an
+ unsigned long integer value. If S points to a non-digit set *P to S,
+ *VAL to 0, and return 0. Otherwise, accumulate the integer value of
+ the string of digits. If the string of digits represents a value
+ larger than ULONG_MAX, don't modify *VAL or *P and return non-zero.
+ Otherwise, advance *P to the first non-digit after S, set *VAL to
+ the result of the conversion and return zero. */
+
+static int
+simple_strtoul (s, p, val)
+ const char *s;
+ const char **p;
+ long unsigned int *val;
+{
+ unsigned long int sum;
+
+ sum = 0;
+ while (isdigit (*s))
+ {
+ unsigned int c = *s++ - '0';
+ if (sum > (ULONG_MAX - c) / 10)
+ return 1;
+ sum = sum * 10 + c;
+ }
+ *p = s;
+ *val = sum;
+ return 0;
+}
+
+/* If S points to a single valid POSIX-style od format string, put a
+ description of that format in *TSPEC, make *NEXT point at the character
+ following the just-decoded format (if *NEXT is non-NULL), and return
+ zero. If S is not valid, don't modify *NEXT or *TSPEC and return
+ non-zero. For example, if S were "d4afL" *NEXT would be set to "afL"
+ and *TSPEC would be
+ {
+ fmt = SIGNED_DECIMAL;
+ size = INT or LONG; (whichever integral_type_size[4] resolves to)
+ print_function = print_int; (assuming size == INT)
+ fmt_string = "%011d%c";
+ }
+*/
+
+static int
+decode_one_format (s, next, tspec)
+ const char *s;
+ const char **next;
+ struct tspec *tspec;
+{
+ enum size_spec size_spec;
+ unsigned long int size;
+ enum output_format fmt;
+ const char *pre_fmt_string;
+ char *fmt_string;
+ void (*print_function) ();
+ const char *p;
+ unsigned int c;
+
+ assert (tspec != NULL);
+
+ switch (*s)
+ {
+ case 'd':
+ case 'o':
+ case 'u':
+ case 'x':
+ c = *s;
+ ++s;
+ switch (*s)
+ {
+ case 'C':
+ ++s;
+ size = sizeof (char);
+ break;
+
+ case 'S':
+ ++s;
+ size = sizeof (short);
+ break;
+
+ case 'I':
+ ++s;
+ size = sizeof (int);
+ break;
+
+ case 'L':
+ ++s;
+ size = sizeof (long int);
+ break;
+
+ default:
+ if (simple_strtoul (s, &p, &size) != 0)
+ return 1;
+ if (p == s)
+ size = sizeof (int);
+ else
+ {
+ if (size > MAX_INTEGRAL_TYPE_SIZE
+ || integral_type_size[size] == NO_SIZE)
+ return 1;
+ s = p;
+ }
+ break;
+ }
+
+#define FMT_BYTES_ALLOCATED 9
+ fmt_string = xmalloc (FMT_BYTES_ALLOCATED);
+
+ size_spec = integral_type_size[size];
+
+ switch (c)
+ {
+ case 'd':
+ fmt = SIGNED_DECIMAL;
+ sprintf (fmt_string, "%%0%u%sd%%c",
+ bytes_to_signed_dec_digits[size],
+ (size_spec == LONG ? "l" : ""));
+ break;
+
+ case 'o':
+ fmt = OCTAL;
+ sprintf (fmt_string, "%%0%u%so%%c",
+ bytes_to_oct_digits[size],
+ (size_spec == LONG ? "l" : ""));
+ break;
+
+ case 'u':
+ fmt = UNSIGNED_DECIMAL;
+ sprintf (fmt_string, "%%0%u%su%%c",
+ bytes_to_unsigned_dec_digits[size],
+ (size_spec == LONG ? "l" : ""));
+ break;
+
+ case 'x':
+ fmt = HEXADECIMAL;
+ sprintf (fmt_string, "%%0%u%sx%%c",
+ bytes_to_hex_digits[size],
+ (size_spec == LONG ? "l" : ""));
+ break;
+
+ default:
+ abort ();
+ }
+
+ assert (strlen (fmt_string) < FMT_BYTES_ALLOCATED);
+
+ switch (size_spec)
+ {
+ case CHAR:
+ print_function = (fmt == SIGNED_DECIMAL
+ ? print_s_char
+ : print_char);
+ break;
+
+ case SHORT:
+ print_function = (fmt == SIGNED_DECIMAL
+ ? print_s_short
+ : print_short);;
+ break;
+
+ case INT:
+ print_function = print_int;
+ break;
+
+ case LONG:
+ print_function = print_long;
+ break;
+
+ default:
+ abort ();
+ }
+ break;
+
+ case 'f':
+ fmt = FLOATING_POINT;
+ ++s;
+ switch (*s)
+ {
+ case 'F':
+ ++s;
+ size = sizeof (float);
+ break;
+
+ case 'D':
+ ++s;
+ size = sizeof (double);
+ break;
+
+ case 'L':
+ ++s;
+ size = sizeof (LONG_DOUBLE);
+ break;
+
+ default:
+ if (simple_strtoul (s, &p, &size) != 0)
+ return 1;
+ if (p == s)
+ size = sizeof (double);
+ else
+ {
+ if (size > MAX_FP_TYPE_SIZE
+ || fp_type_size[size] == NO_SIZE)
+ return 1;
+ s = p;
+ }
+ break;
+ }
+ size_spec = fp_type_size[size];
+
+ switch (size_spec)
+ {
+ case FP_SINGLE:
+ print_function = print_float;
+ pre_fmt_string = "%%%d.%d#e%%c";
+ fmt_string = xmalloc (strlen (pre_fmt_string));
+ sprintf (fmt_string, pre_fmt_string,
+ FLT_DIG + 8, FLT_DIG);
+ break;
+
+ case FP_DOUBLE:
+ print_function = print_double;
+ pre_fmt_string = "%%%d.%d#e%%c";
+ fmt_string = xmalloc (strlen (pre_fmt_string));
+ sprintf (fmt_string, pre_fmt_string,
+ DBL_DIG + 8, DBL_DIG);
+ break;
+
+#ifdef __GNUC__
+ case FP_LONG_DOUBLE:
+ print_function = print_long_double;
+ pre_fmt_string = "%%%d.%d#le%%c";
+ fmt_string = xmalloc (strlen (pre_fmt_string));
+ sprintf (fmt_string, pre_fmt_string,
+ LDBL_DIG + 8, LDBL_DIG);
+ break;
+#endif
+
+ default:
+ abort ();
+ }
+ break;
+
+ case 'a':
+ ++s;
+ fmt = NAMED_CHARACTER;
+ size_spec = CHAR;
+ fmt_string = NULL;
+ print_function = print_named_ascii;
+ break;
+
+ case 'c':
+ ++s;
+ fmt = CHARACTER;
+ size_spec = CHAR;
+ fmt_string = NULL;
+ print_function = print_ascii;
+ break;
+
+ default:
+ return 1;
+ }
+
+ tspec->size = size_spec;
+ tspec->fmt = fmt;
+ tspec->print_function = print_function;
+ tspec->fmt_string = fmt_string;
+
+ if (next != NULL)
+ *next = s;
+
+ return 0;
+}
+
+/* Decode the POSIX-style od format string S. Append the decoded
+ representation to the global array SPEC, reallocating SPEC if
+ necessary. Return zero if S is valid, non-zero otherwise. */
+
+static int
+decode_format_string (s)
+ const char *s;
+{
+ assert (s != NULL);
+
+ while (*s != '\0')
+ {
+ struct tspec tspec;
+ const char *next;
+
+ if (decode_one_format (s, &next, &tspec))
+ return 1;
+
+ assert (s != next);
+ s = next;
+
+ if (n_specs >= n_specs_allocated)
+ {
+ n_specs_allocated = 1 + (3 * n_specs_allocated) / 2;
+ spec = (struct tspec *) xrealloc (spec, (n_specs_allocated
+ * sizeof (struct tspec)));
+ }
+
+ bcopy ((char *) &tspec, (char *) &spec[n_specs], sizeof (struct tspec));
+ ++n_specs;
+ }
+
+ return 0;
+}
+
+/* Given a list of one or more input filenames FILE_LIST, set the global
+ file pointer IN_STREAM to position N_SKIP in the concatenation of
+ those files. If any file operation fails or if there are fewer than
+ N_SKIP bytes in the combined input, give an error message and exit.
+ When possible, use seek- rather than read operations to advance
+ IN_STREAM. A file name of "-" is interpreted as standard input. */
+
+static void
+skip (n_skip)
+ long unsigned int n_skip;
+{
+ for ( /*empty */ ; *file_list != NULL; ++file_list)
+ {
+ struct stat file_stats;
+ int j;
+
+ if (STREQ (*file_list, "-"))
+ {
+ input_filename = "standard input";
+ in_stream = stdin;
+ }
+ else
+ {
+ input_filename = *file_list;
+ in_stream = fopen (input_filename, "r");
+ if (in_stream == NULL)
+ error (2, errno, "%s", input_filename);
+ }
+
+ if (n_skip == 0)
+ break;
+
+ /* First try using fseek. For large offsets, all this work is
+ worthwhile. If the offset is below some threshold it may be
+ more efficient to move the pointer by reading. There are two
+ issues when trying to use fseek:
+ - the file must be seekable.
+ - before seeking to the specified position, make sure
+ that the new position is in the current file.
+ Try to do that by getting file's size using stat().
+ But that will work only for regular files and dirs. */
+
+ if (fstat (fileno (in_stream), &file_stats))
+ error (2, errno, "%s", input_filename);
+
+ /* The st_size field is valid only for regular files and
+ directories. FIXME: is the preceding true?
+ If the number of bytes left to skip is at least as large as
+ the size of the current file, we can decrement
+ n_skip and go on to the next file. */
+ if (S_ISREG (file_stats.st_mode) || S_ISDIR (file_stats.st_mode))
+ {
+ if (n_skip >= file_stats.st_size)
+ {
+ n_skip -= file_stats.st_size;
+ if (in_stream != stdin)
+ {
+ if (fclose (in_stream))
+ error (2, errno, "%s", input_filename);
+ }
+ continue;
+ }
+ else
+ {
+ if (fseek (in_stream, n_skip, SEEK_SET) == 0)
+ {
+ n_skip = 0;
+ break;
+ }
+ }
+ }
+
+ /* fseek didn't work or wasn't attempted; do it the slow way. */
+
+ for (j = n_skip / BUFSIZ; j >= 0; j--)
+ {
+ char buf[BUFSIZ];
+ size_t n_bytes_to_read = (j > 0
+ ? BUFSIZ
+ : n_skip % BUFSIZ);
+ size_t n_bytes_read;
+ n_bytes_read = fread (buf, 1, n_bytes_to_read, in_stream);
+ n_skip -= n_bytes_read;
+ if (n_bytes_read != n_bytes_to_read)
+ {
+ if (ferror (in_stream))
+ error (2, errno, "%s", input_filename);
+ else
+ break;
+ }
+ }
+
+ if (n_skip == 0)
+ break;
+ }
+
+ if (n_skip != 0)
+ error (2, 0, "cannot skip past end of combined input");
+}
+
+static const char *
+format_address (address)
+ long unsigned int address;
+{
+ const char *address_string;
+
+ if (output_address_fmt_string == NULL)
+ address_string = "";
+ else
+ {
+ sprintf (address_fmt_buffer, output_address_fmt_string, address);
+ address_string = address_fmt_buffer;
+ }
+ return address_string;
+}
+
+/* Write N_BYTES bytes from CURR_BLOCK to standard output once for each
+ of the N_SPEC format specs. CURRENT_OFFSET is the byte address of
+ CURR_BLOCK in the concatenation of input files, and it is printed
+ (optionally) only before the output line associated with the first
+ format spec. When duplicate blocks are being abbreviated, the output
+ for a sequence of identical input blocks is the output for the first
+ block followed by an asterisk alone on a line. It is valid to compare
+ the blocks PREV_BLOCK and CURR_BLOCK only when N_BYTES == BYTES_PER_BLOCK.
+ That condition may be false only for the last input block -- and then
+ only when it has not been padded to length BYTES_PER_BLOCK. */
+
+static void
+write_block (current_offset, n_bytes, prev_block, curr_block)
+ long unsigned int current_offset;
+ long unsigned int n_bytes;
+ const char *prev_block;
+ const char *curr_block;
+{
+ static int first = 1;
+ static int prev_pair_equal = 0;
+
+#define EQUAL_BLOCKS(b1, b2) (bcmp ((b1), (b2), bytes_per_block) == 0)
+
+ if (abbreviate_duplicate_blocks
+ && !first && n_bytes == bytes_per_block
+ && EQUAL_BLOCKS (prev_block, curr_block))
+ {
+ if (prev_pair_equal)
+ {
+ /* The two preceding blocks were equal, and the current
+ block is the same as the last one, so print nothing. */
+ }
+ else
+ {
+ printf ("*\n");
+ prev_pair_equal = 1;
+ }
+ }
+ else
+ {
+ int i;
+
+ prev_pair_equal = 0;
+ for (i = 0; i < n_specs; i++)
+ {
+ if (printf ("%s ", (i == 0
+ ? format_address (current_offset)
+ : address_pad))
+ == EOF)
+ error (2, errno, "standard output");
+ (*spec[i].print_function) (n_bytes, curr_block, spec[i].fmt_string);
+ }
+ }
+ first = 0;
+}
+
+/* Read and return a single byte from the concatenation of the input
+ files named in the global array FILE_LIST. On the first call to this
+ function, the global variable IN_STREAM is expected to be an open
+ stream associated with the input file *FILE_LIST. If IN_STREAM is
+ at end-of-file, close it and update the global variables IN_STREAM,
+ FILE_LIST, and INPUT_FILENAME so they correspond to the next file in
+ the list. Then try to read a byte from the newly opened file.
+ Repeat if necessary until *FILE_LIST is NULL. Upon any read-, open-,
+ or close error give a message and exit. When EOF is reached for the
+ last file in FILE_LIST, return EOF. Any subsequent calls return EOF. */
+
+static int
+read_char ()
+{
+ if (*file_list == NULL)
+ return EOF;
+
+ while (1)
+ {
+ int c;
+
+ c = fgetc (in_stream);
+
+ if (c != EOF)
+ return c;
+
+ if (errno != 0)
+ error (2, errno, "%s", input_filename);
+
+ if (in_stream != stdin)
+ if (fclose (in_stream) == EOF)
+ error (2, errno, "%s", input_filename);
+
+ ++file_list;
+ if (*file_list == NULL)
+ return EOF;
+
+ if (STREQ (*file_list, "-"))
+ {
+ input_filename = "standard input";
+ in_stream = stdin;
+ }
+ else
+ {
+ input_filename = *file_list;
+ in_stream = fopen (input_filename, "r");
+ if (in_stream == NULL)
+ error (2, errno, "%s", input_filename);
+ }
+ }
+}
+
+/* Read N bytes into BLOCK from the concatenation of the input files
+ named in the global array FILE_LIST. On the first call to this
+ function, the global variable IN_STREAM is expected to be an open
+ stream associated with the input file *FILE_LIST. On subsequent
+ calls, if *FILE_LIST is NULL, don't modify BLOCK and return zero.
+ If all N bytes cannot be read from IN_STREAM, close IN_STREAM and
+ update the global variables IN_STREAM, FILE_LIST, and INPUT_FILENAME.
+ Then try to read the remaining bytes from the newly opened file.
+ Repeat if necessary until *FILE_LIST is NULL. Upon any read-, open-,
+ or close error give a message and exit. Otherwise, return the number
+ of bytes read. */
+
+static unsigned long int
+read_block (n, block)
+ size_t n;
+ char *block;
+{
+ unsigned long int n_bytes_in_buffer;
+
+ assert (n > 0 && n <= bytes_per_block);
+ if (n == 0)
+ return 0;
+
+ n_bytes_in_buffer = 0;
+
+ if (*file_list == NULL)
+ return 0; /* EOF. */
+
+ while (1)
+ {
+ size_t n_needed;
+ size_t n_read;
+
+ n_needed = n - n_bytes_in_buffer;
+ n_read = fread (block + n_bytes_in_buffer, 1, n_needed, in_stream);
+
+ if (ferror (in_stream))
+ error (2, errno, "%s", input_filename);
+
+ if (n_read == n_needed)
+ return n;
+
+ n_bytes_in_buffer += n_read;
+
+ if (in_stream != stdin)
+ if (fclose (in_stream) == EOF)
+ error (2, errno, "%s", input_filename);
+
+ ++file_list;
+ if (*file_list == NULL)
+ return n_bytes_in_buffer;
+
+ if (STREQ (*file_list, "-"))
+ {
+ input_filename = "standard input";
+ in_stream = stdin;
+ }
+ else
+ {
+ input_filename = *file_list;
+ in_stream = fopen (input_filename, "r");
+ if (in_stream == NULL)
+ error (2, errno, "%s", input_filename);
+ }
+ }
+}
+
+/* Return the least common multiple of the sizes associated
+ with the format specs. */
+
+static int
+get_lcm ()
+{
+ int i;
+ int l_c_m = 1;
+
+ for (i = 0; i < n_specs; i++)
+ l_c_m = lcm (l_c_m, width_bytes[(int) spec[i].size]);
+ return l_c_m;
+}
+
+/* Read chunks of size BYTES_PER_BLOCK from the input files, write the
+ formatted block to standard output, and repeat until the specified
+ maximum number of bytes has been read or until all input has been
+ processed. If the last block read is smaller than BYTES_PER_BLOCK
+ and its size is not a multiple of the size associated with a format
+ spec, extend the input block with zero bytes until its length is a
+ multiple of all format spec sizes. Write the final block. Finally,
+ write on a line by itself the offset of the byte after the last byte
+ read. */
+
+static void
+dump ()
+{
+ char *block[2];
+ unsigned long int current_offset;
+ int idx = 0;
+ size_t n_bytes_read;
+
+ block[0] = (char *) alloca (bytes_per_block);
+ block[1] = (char *) alloca (bytes_per_block);
+
+ current_offset = n_bytes_to_skip;
+
+ if (limit_bytes_to_format)
+ {
+ size_t end_offset = n_bytes_to_skip + max_bytes_to_format;
+
+ n_bytes_read = 0;
+ while (current_offset < end_offset)
+ {
+ size_t n_needed;
+ n_needed = MIN (end_offset - current_offset, bytes_per_block);
+ n_bytes_read = read_block (n_needed, block[idx]);
+ if (n_bytes_read < bytes_per_block)
+ break;
+ assert (n_bytes_read == bytes_per_block);
+ write_block (current_offset, n_bytes_read,
+ block[!idx], block[idx]);
+ current_offset += n_bytes_read;
+ idx = !idx;
+ }
+ }
+ else
+ {
+ while (1)
+ {
+ n_bytes_read = read_block (bytes_per_block, block[idx]);
+ if (n_bytes_read < bytes_per_block)
+ break;
+ assert (n_bytes_read == bytes_per_block);
+ write_block (current_offset, n_bytes_read,
+ block[!idx], block[idx]);
+ current_offset += n_bytes_read;
+ idx = !idx;
+ }
+ }
+
+ if (n_bytes_read > 0)
+ {
+ int l_c_m;
+ size_t bytes_to_write;
+
+ l_c_m = get_lcm ();
+
+ /* Make bytes_to_write the smallest multiple of l_c_m that
+ is at least as large as n_bytes_read. */
+ bytes_to_write = l_c_m * (int) ((n_bytes_read + l_c_m - 1) / l_c_m);
+
+ bzero (block[idx] + n_bytes_read, bytes_to_write - n_bytes_read);
+ write_block (current_offset, bytes_to_write,
+ block[!idx], block[idx]);
+ current_offset += n_bytes_read;
+ }
+
+ if (output_address_fmt_string != NULL)
+ {
+ if (printf ("%s\n", format_address (current_offset)) == EOF)
+ error (2, errno, "standard output");
+ }
+}
+
+/* STRINGS mode. Find each "string constant" in the file.
+ A string constant is a run of at least `string_min' ASCII graphic
+ (or formatting) characters terminated by a null. Based on a
+ function written by Richard Stallman for a pre-POSIX
+ version of od. */
+
+static void
+dump_strings ()
+{
+ int bufsize = MAX (100, string_min);
+ char *buf = xmalloc (bufsize);
+ unsigned long address = n_bytes_to_skip;
+
+ while (1)
+ {
+ int i;
+ int c;
+
+ /* See if the next `string_min' chars are all printing chars. */
+ tryline:
+
+ if (limit_bytes_to_format
+ && address >= (n_bytes_to_skip + max_bytes_to_format - string_min))
+ break;
+
+ for (i = 0; i < string_min; i++)
+ {
+ c = read_char ();
+ address++;
+ if (c < 0)
+ return;
+ if (!isprint (c))
+ /* Found a non-printing. Try again starting with next char. */
+ goto tryline;
+ buf[i] = c;
+ }
+
+ /* We found a run of `string_min' printable characters.
+ Now see if it is terminated with a null byte. */
+ while (!limit_bytes_to_format
+ || address < n_bytes_to_skip + max_bytes_to_format)
+ {
+ if (i == bufsize)
+ {
+ bufsize = 1 + 3 * bufsize / 2;
+ buf = xrealloc (buf, bufsize);
+ }
+ c = read_char ();
+ address++;
+ if (c < 0)
+ return;
+ if (c == '\0')
+ break; /* It is; print this string. */
+ if (!isprint (c))
+ goto tryline; /* It isn't; give up on this string. */
+ buf[i++] = c; /* String continues; store it all. */
+ }
+
+ /* If we get here, the string is all printable and null-terminated,
+ so print it. It is all in `buf' and `i' is its length. */
+ buf[i] = 0;
+ if (output_address_fmt_string != NULL)
+ {
+ if (printf ("%s ", format_address (address - i - 1)) == EOF)
+ error (2, errno, "standard output");
+ }
+ for (i = 0; (c = buf[i]); i++)
+ {
+ int err;
+ switch (c)
+ {
+ case '\007':
+ err = fputs ("\\a", stdout);
+ break;
+
+ case '\b':
+ err = fputs ("\\b", stdout);
+ break;
+
+ case '\f':
+ err = fputs ("\\f", stdout);
+ break;
+
+ case '\n':
+ err = fputs ("\\n", stdout);
+ break;
+
+ case '\r':
+ err = fputs ("\\r", stdout);
+ break;
+
+ case '\t':
+ err = fputs ("\\t", stdout);
+ break;
+
+ case '\v':
+ err = fputs ("\\v", stdout);
+ break;
+
+ default:
+ err = putchar (c);
+ }
+ if (err == EOF)
+ error (2, errno, "standard output");
+ }
+ if (putchar ('\n') == EOF)
+ error (2, errno, "standard output");
+ }
+ free (buf);
+}
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int n_files;
+ int i;
+ unsigned int l_c_m;
+ unsigned int address_pad_len;
+ unsigned long int desired_width;
+ int width_specified = 0;
+
+ program_name = argv[0];
+
+ for (i = 0; i <= MAX_INTEGRAL_TYPE_SIZE; i++)
+ integral_type_size[i] = NO_SIZE;
+
+ integral_type_size[sizeof (char)] = CHAR;
+ integral_type_size[sizeof (short int)] = SHORT;
+ integral_type_size[sizeof (int)] = INT;
+ integral_type_size[sizeof (long int)] = LONG;
+
+ for (i = 0; i <= MAX_FP_TYPE_SIZE; i++)
+ fp_type_size[i] = NO_SIZE;
+
+ fp_type_size[sizeof (float)] = FP_SINGLE;
+ /* The array entry for `double' is filled in after that for LONG_DOUBLE
+ so that if `long double' is the same type or if long double isn't
+ supported FP_LONG_DOUBLE will never be used. */
+ fp_type_size[sizeof (LONG_DOUBLE)] = FP_LONG_DOUBLE;
+ fp_type_size[sizeof (double)] = FP_DOUBLE;
+
+ n_specs = 0;
+ n_specs_allocated = 5;
+ spec = (struct tspec *) xmalloc (n_specs_allocated * sizeof (struct tspec));
+
+ output_address_fmt_string = "%07o";
+ address_pad_len = 7;
+ flag_dump_strings = 0;
+
+ while ((c = getopt_long (argc, argv, "abcdfhilos::xw::A:j:N:t:v",
+ long_options, (int *) 0))
+ != EOF)
+ {
+ strtoul_error err;
+
+ switch (c)
+ {
+ case 'A':
+ switch (optarg[0])
+ {
+ case 'd':
+ output_address_fmt_string = "%07d";
+ address_pad_len = 7;
+ break;
+ case 'o':
+ output_address_fmt_string = "%07o";
+ address_pad_len = 7;
+ break;
+ case 'x':
+ output_address_fmt_string = "%06x";
+ address_pad_len = 6;
+ break;
+ case 'n':
+ output_address_fmt_string = NULL;
+ address_pad_len = 0;
+ break;
+ default:
+ error (2, 0,
+ "invalid output address radix `%c'; it must be one character from [doxn]",
+ optarg[0]);
+ break;
+ }
+ break;
+
+ case 'j':
+ err = my_strtoul (optarg, 0, &n_bytes_to_skip, 1);
+ if (err != UINT_OK)
+ uint_fatal_error (optarg, "skip argument", err);
+ break;
+
+ case 'N':
+ limit_bytes_to_format = 1;
+
+ err = my_strtoul (optarg, 0, &max_bytes_to_format, 1);
+ if (err != UINT_OK)
+ uint_fatal_error (optarg, "limit argument", err);
+ break;
+
+ case 's':
+ if (optarg == NULL)
+ string_min = 3;
+ else
+ {
+ err = my_strtoul (optarg, 0, &string_min, 1);
+ if (err != UINT_OK)
+ uint_fatal_error (optarg, "minimum string length", err);
+ }
+ ++flag_dump_strings;
+ break;
+
+ case 't':
+ if (decode_format_string (optarg))
+ error (2, 0, "invalid type string `%s'", optarg);
+ break;
+
+ case 'v':
+ abbreviate_duplicate_blocks = 0;
+ break;
+
+ /* The next several cases map the old, pre-POSIX format
+ specification options to the corresponding POSIX format
+ specs. GNU od accepts any combination of old- and
+ new-style options. If only POSIX format specs are used
+ and more than one is used, they are accumulated. If only
+ old-style options are used, all but the last are ignored.
+ If both types of specs are used in the same command, the
+ last old-style option and any POSIX specs following it
+ are accumulated. To illustrate, `od -c -t a' is the same
+ as `od -t ca', but `od -t a -c' is the same as `od -c'. */
+
+#define CASE_OLD_ARG(old_char,new_string) \
+ case old_char: \
+ { \
+ const char *next; \
+ int tmp; \
+ assert (n_specs_allocated >= 1); \
+ tmp = decode_one_format (new_string, &next, &(spec[0])); \
+ n_specs = 1; \
+ assert (tmp == 0); \
+ assert (*next == '\0'); \
+ } \
+ break
+
+ CASE_OLD_ARG ('a', "a");
+ CASE_OLD_ARG ('b', "oC");
+ CASE_OLD_ARG ('c', "c");
+ CASE_OLD_ARG ('d', "u2");
+ CASE_OLD_ARG ('f', "fF");
+ CASE_OLD_ARG ('h', "x2");
+ CASE_OLD_ARG ('i', "d2");
+ CASE_OLD_ARG ('l', "d4");
+ CASE_OLD_ARG ('o', "o2");
+ CASE_OLD_ARG ('x', "x2");
+
+#undef CASE_OLD_ARG
+
+ case 'w':
+ width_specified = 1;
+ if (optarg == NULL)
+ {
+ desired_width = 32;
+ }
+ else
+ {
+ err = my_strtoul (optarg, 10, &desired_width, 0);
+ if (err != UINT_OK)
+ error (2, 0, "invalid width specification `%s'", optarg);
+ }
+ break;
+
+ default:
+ usage ();
+ break;
+ }
+ }
+
+ if (flag_dump_strings && n_specs > 0)
+ error (2, 0, "no type may be specified when dumping strings");
+
+ assert (address_pad_len <= MAX_ADDRESS_LENGTH);
+ for (i = 0; i < address_pad_len; i++)
+ address_pad[i] = ' ';
+ address_pad[address_pad_len] = '\0';
+
+ if (n_specs == 0)
+ {
+ int err = decode_one_format ("o2", NULL, &(spec[0]));
+
+ assert (err == 0);
+ n_specs = 1;
+ }
+
+ n_files = argc - optind;
+ if (n_files > 0)
+ file_list = (char const *const *) &argv[optind];
+ else
+ {
+ /* If no files were listed on the command line, set up the
+ global array FILE_LIST so that it contains the null-terminated
+ list of one name: "-". */
+ static char const * const default_file_list[] = {"-", NULL};
+
+ file_list = default_file_list;
+ }
+
+ skip (n_bytes_to_skip);
+
+ /* Compute output block length. */
+ l_c_m = get_lcm ();
+
+ if (width_specified)
+ {
+ if (desired_width != 0 && desired_width % l_c_m == 0)
+ bytes_per_block = desired_width;
+ else
+ {
+ error (0, 0, "warning: invalid width %d; using %d instead",
+ desired_width, l_c_m);
+ bytes_per_block = l_c_m;
+ }
+ }
+ else
+ {
+ if (l_c_m < DEFAULT_BYTES_PER_BLOCK)
+ bytes_per_block = l_c_m * (int) (DEFAULT_BYTES_PER_BLOCK / l_c_m);
+ else
+ bytes_per_block = l_c_m;
+ }
+
+#ifdef DEBUG
+ for (i = 0; i < n_specs; i++)
+ {
+ printf ("%d: fmt=\"%s\" width=%d\n",
+ i, spec[i].fmt_string, width_bytes[spec[i].size]);
+ }
+#endif
+
+ if (flag_dump_strings)
+ {
+ dump_strings ();
+ }
+ else
+ {
+ dump ();
+ }
+
+ exit (0);
+}
diff --git a/src/paste.c b/src/paste.c
new file mode 100644
index 000000000..c7058a63c
--- /dev/null
+++ b/src/paste.c
@@ -0,0 +1,458 @@
+/* paste - merge lines of files
+ Copyright (C) 1984 by David M. Ihnat
+
+ This program is a total rewrite of the Bell Laboratories Unix(Tm)
+ command of the same name, as of System V. It contains no proprietary
+ code, and therefore may be used without violation of any proprietary
+ agreements whatsoever. However, you will notice that the program is
+ copyrighted by me. This is to assure the program does *not* fall
+ into the public domain. Thus, I may specify just what I am now:
+ This program may be freely copied and distributed, provided this notice
+ remains; it may not be sold for profit without express written consent of
+ the author.
+ Please note that I recreated the behavior of the Unix(Tm) 'paste' command
+ as faithfully as possible, with minor exceptions; however,
+ I haven't run a full set of regression tests. Thus, the user of
+ this program accepts full responsibility for any effects or loss;
+ in particular, the author is not responsible for any losses,
+ explicit or incidental, that may be incurred through use of this program.
+
+ I ask that any bugs (and, if possible, fixes) be reported to me when
+ possible. -David Ihnat (312) 784-4544 ignatz@homebru.chi.il.us
+
+ The list of valid escape sequences has been expanded over the Unix
+ version, to include \b, \f, \r, and \v.
+
+ POSIX changes, bug fixes, long-named options, and cleanup
+ by David MacKenzie <djm@ai.mit.edu>.
+
+ Options:
+ --serial
+ -s Paste one file at a time rather than
+ one line from each file.
+ --delimiters=delim-list
+ -d delim-list Consecutively use the characters in
+ DELIM-LIST instead of tab to separate
+ merged lines. When DELIM-LIST is exhausted,
+ start again at its beginning.
+ A FILE of `-' means standard input.
+ If no FILEs are given, standard input is used. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+char *collapse_escapes ();
+char *xmalloc ();
+char *xrealloc ();
+int paste_parallel ();
+int paste_serial ();
+void error ();
+void usage ();
+
+/* Indicates that no delimiter should be added in the current position. */
+#define EMPTY_DELIM '\0'
+
+/* Element marking a file that has reached EOF and been closed. */
+#define CLOSED ((FILE *) -1)
+
+/* Element marking end of list of open files. */
+#define ENDLIST ((FILE *) -2)
+
+/* Name this program was run with. */
+char *program_name;
+
+/* If nonzero, we have read standard input at some point. */
+int have_read_stdin;
+
+/* If nonzero, merge subsequent lines of each file rather than
+ corresponding lines from each file in parallel. */
+int serial_merge;
+
+/* The delimeters between lines of input files (used cyclically). */
+char *delims;
+
+/* A pointer to the character after the end of `delims'. */
+char *delim_end;
+
+struct option longopts[] =
+{
+ {"serial", 0, 0, 's'},
+ {"delimiters", 1, 0, 'd'},
+ {0, 0, 0, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int optc, exit_status;
+ char default_delims[2], zero_delims[3];
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+ serial_merge = 0;
+ delims = default_delims;
+ strcpy (delims, "\t");
+ strcpy (zero_delims, "\\0");
+
+ while ((optc = getopt_long (argc, argv, "d:s", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (optc)
+ {
+ case 'd':
+ /* Delimiter character(s). */
+ if (optarg[0] == '\0')
+ optarg = zero_delims;
+ delims = optarg;
+ break;
+
+ case 's':
+ serial_merge++;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (optind == argc)
+ argv[argc++] = "-";
+
+ delim_end = collapse_escapes (delims);
+
+ if (!serial_merge)
+ exit_status = paste_parallel (argc - optind, &argv[optind]);
+ else
+ exit_status = paste_serial (argc - optind, &argv[optind]);
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, errno, "write error");
+ exit (exit_status);
+}
+
+/* Replace backslash representations of special characters in
+ STRPTR with their actual values.
+ The set of possible backslash characters has been expanded beyond
+ that recognized by the Unix version.
+
+ Return a pointer to the character after the new end of STRPTR. */
+
+char *
+collapse_escapes (strptr)
+ char *strptr;
+{
+ register char *strout;
+
+ strout = strptr; /* Start at the same place, anyway. */
+
+ while (*strptr)
+ {
+ if (*strptr != '\\') /* Is it an escape character? */
+ *strout++ = *strptr++; /* No, just transfer it. */
+ else
+ {
+ switch (*++strptr)
+ {
+ case '0':
+ *strout++ = EMPTY_DELIM;
+ break;
+
+ case 'b':
+ *strout++ = '\b';
+ break;
+
+ case 'f':
+ *strout++ = '\f';
+ break;
+
+ case 'n':
+ *strout++ = '\n';
+ break;
+
+ case 'r':
+ *strout++ = '\r';
+ break;
+
+ case 't':
+ *strout++ = '\t';
+ break;
+
+ case 'v':
+ *strout++ = '\v';
+ break;
+
+ default:
+ *strout++ = *strptr;
+ break;
+ }
+ strptr++;
+ }
+ }
+ return strout;
+}
+
+/* Perform column paste on the NFILES files named in FNAMPTR.
+ Return 0 if no errors, 1 if one or more files could not be
+ opened or read. */
+
+int
+paste_parallel (nfiles, fnamptr)
+ int nfiles;
+ char **fnamptr;
+{
+ int errors = 0; /* 1 if open or read errors occur. */
+ /* Number of files for which space is allocated in `delbuf' and `fileptr'.
+ Enlarged as necessary. */
+ int file_list_size = 12;
+ int chr; /* Input character. */
+ int line_length; /* Number of chars in line. */
+ int somedone; /* 0 if all files empty for this line. */
+ /* If all files are just ready to be closed, or will be on this
+ round, the string of delimiters must be preserved.
+ delbuf[0] through delbuf[file_list_size]
+ store the delimiters for closed files. */
+ char *delbuf;
+ int delims_saved; /* Number of delims saved in `delbuf'. */
+ register char *delimptr; /* Cycling pointer into `delims'. */
+ FILE **fileptr; /* Streams open to the files to process. */
+ int files_open; /* Number of files still open to process. */
+ int i; /* Loop index. */
+ int opened_stdin = 0; /* Nonzero if any fopen got fd 0. */
+
+ delbuf = (char *) xmalloc (file_list_size + 2);
+ fileptr = (FILE **) xmalloc ((file_list_size + 1) * sizeof (FILE *));
+
+ /* Attempt to open all files. This could be expanded to an infinite
+ number of files, but at the (considerable) expense of remembering
+ each file and its current offset, then opening/reading/closing. */
+
+ for (files_open = 0; files_open < nfiles; ++files_open)
+ {
+ if (files_open == file_list_size - 2)
+ {
+ file_list_size += 12;
+ delbuf = (char *) xrealloc (delbuf, file_list_size + 2);
+ fileptr = (FILE **) xrealloc (fileptr, (file_list_size + 1)
+ * sizeof (FILE *));
+ }
+ if (!strcmp (fnamptr[files_open], "-"))
+ {
+ have_read_stdin = 1;
+ fileptr[files_open] = stdin;
+ }
+ else
+ {
+ fileptr[files_open] = fopen (fnamptr[files_open], "r");
+ if (fileptr[files_open] == NULL)
+ error (1, errno, "%s", fnamptr[files_open]);
+ else if (fileno (fileptr[files_open]) == 0)
+ opened_stdin = 1;
+ }
+ }
+
+ fileptr[files_open] = ENDLIST;
+
+ if (opened_stdin && have_read_stdin)
+ error (1, 0, "standard input is closed");
+
+ /* Read a line from each file and output it to stdout separated by a
+ delimiter, until we go through the loop without successfully
+ reading from any of the files. */
+
+ while (files_open)
+ {
+ /* Set up for the next line. */
+ somedone = 0;
+ delimptr = delims;
+ delims_saved = 0;
+
+ for (i = 0; fileptr[i] != ENDLIST && files_open; i++)
+ {
+ line_length = 0; /* Clear so we can easily detect EOF. */
+ if (fileptr[i] != CLOSED)
+ {
+ chr = getc (fileptr[i]);
+ if (chr != EOF && delims_saved)
+ {
+ fwrite (delbuf, sizeof (char), delims_saved, stdout);
+ delims_saved = 0;
+ }
+
+ while (chr != EOF)
+ {
+ line_length++;
+ if (chr == '\n')
+ break;
+ putc (chr, stdout);
+ chr = getc (fileptr[i]);
+ }
+ }
+
+ if (line_length == 0)
+ {
+ /* EOF, read error, or closed file.
+ If an EOF or error, close the file and mark it in the list. */
+ if (fileptr[i] != CLOSED)
+ {
+ if (ferror (fileptr[i]))
+ {
+ error (0, errno, "%s", fnamptr[i]);
+ errors = 1;
+ }
+ if (fileptr[i] == stdin)
+ clearerr (fileptr[i]); /* Also clear EOF. */
+ else if (fclose (fileptr[i]) == EOF)
+ {
+ error (0, errno, "%s", fnamptr[i]);
+ errors = 1;
+ }
+
+ fileptr[i] = CLOSED;
+ files_open--;
+ }
+
+ if (fileptr[i + 1] == ENDLIST)
+ {
+ /* End of this output line.
+ Is this the end of the whole thing? */
+ if (somedone)
+ {
+ /* No. Some files were not closed for this line. */
+ if (delims_saved)
+ {
+ fwrite (delbuf, sizeof (char), delims_saved, stdout);
+ delims_saved = 0;
+ }
+ putc ('\n', stdout);
+ }
+ continue; /* Next read of files, or exit. */
+ }
+ else
+ {
+ /* Closed file; add delimiter to `delbuf'. */
+ if (*delimptr != EMPTY_DELIM)
+ delbuf[delims_saved++] = *delimptr;
+ if (++delimptr == delim_end)
+ delimptr = delims;
+ }
+ }
+ else
+ {
+ /* Some data read. */
+ somedone++;
+
+ /* Except for last file, replace last newline with delim. */
+ if (fileptr[i + 1] != ENDLIST)
+ {
+ if (chr != '\n')
+ putc (chr, stdout);
+ if (*delimptr != EMPTY_DELIM)
+ putc (*delimptr, stdout);
+ if (++delimptr == delim_end)
+ delimptr = delims;
+ }
+ else
+ putc (chr, stdout);
+ }
+ }
+ }
+ return errors;
+}
+
+/* Perform serial paste on the NFILES files named in FNAMPTR.
+ Return 0 if no errors, 1 if one or more files could not be
+ opened or read. */
+
+int
+paste_serial (nfiles, fnamptr)
+ int nfiles;
+ char **fnamptr;
+{
+ int errors = 0; /* 1 if open or read errors occur. */
+ register int charnew, charold; /* Current and previous char read. */
+ register char *delimptr; /* Current delimiter char. */
+ register FILE *fileptr; /* Open for reading current file. */
+
+ for (; nfiles; nfiles--, fnamptr++)
+ {
+ if (!strcmp (*fnamptr, "-"))
+ {
+ have_read_stdin = 1;
+ fileptr = stdin;
+ }
+ else
+ {
+ fileptr = fopen (*fnamptr, "r");
+ if (fileptr == NULL)
+ {
+ error (0, errno, "%s", *fnamptr);
+ errors = 1;
+ continue;
+ }
+ }
+
+ delimptr = delims; /* Set up for delimiter string. */
+
+ charold = getc (fileptr);
+ if (charold != EOF)
+ {
+ /* `charold' is set up. Hit it!
+ Keep reading characters, stashing them in `charnew';
+ output `charold', converting to the appropriate delimiter
+ character if needed. After the EOF, output `charold'
+ if it's a newline; otherwise, output it and then a newline. */
+
+ while ((charnew = getc (fileptr)) != EOF)
+ {
+ /* Process the old character. */
+ if (charold == '\n')
+ {
+ if (*delimptr != EMPTY_DELIM)
+ putc (*delimptr, stdout);
+
+ if (++delimptr == delim_end)
+ delimptr = delims;
+ }
+ else
+ putc (charold, stdout);
+
+ charold = charnew;
+ }
+
+ /* Hit EOF. Process that last character. */
+ putc (charold, stdout);
+ }
+
+ if (charold != '\n')
+ putc ('\n', stdout);
+
+ if (ferror (fileptr))
+ {
+ error (0, errno, "%s", *fnamptr);
+ errors = 1;
+ }
+ if (fileptr == stdin)
+ clearerr (fileptr); /* Also clear EOF. */
+ else if (fclose (fileptr) == EOF)
+ {
+ error (0, errno, "%s", *fnamptr);
+ errors = 1;
+ }
+ }
+ return errors;
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-s] [-d delim-list] [--serial] [--delimiters=delim-list]\n\
+ [file...]\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/pr.c b/src/pr.c
new file mode 100644
index 000000000..10595ad73
--- /dev/null
+++ b/src/pr.c
@@ -0,0 +1,1844 @@
+/* pr -- convert text files for printing.
+ Copyright (C) 1988, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Author: Pete TerMaat. */
+
+/* Things to watch: Sys V screws up on ...
+ pr -n -3 -s: /usr/dict/words
+ pr -m -o10 -n /usr/dict/words{,,,}
+ pr -6 -a -n -o5 /usr/dict/words
+
+ Ideas:
+
+ Keep a things_to_do list of functions to call when we know we have
+ something to print. Cleaner than current series of checks.
+
+ Improve the printing of control prefixes.
+
+
+ Options:
+
+ +PAGE Begin output at page PAGE of the output.
+
+ -COLUMN Produce output that is COLUMN columns wide and print
+ columns down.
+
+ -a Print columns across rather than down. The input
+ one
+ two
+ three
+ four
+ will be printed as
+ one two three
+ four
+
+ -b Balance columns on the last page.
+
+ -c Print unprintable characters as control prefixes.
+ Control-g is printed as ^G.
+
+ -d Double space the output.
+
+ -e[c[k]] Expand tabs to spaces on input. Optional argument C
+ is the input tab character. (Default is `\t'.) Optional
+ argument K is the input tab character's width. (Default is 8.)
+
+ -F
+ -f Use formfeeds instead of newlines to separate pages.
+
+ -h header Replace the filename in the header with the string HEADER.
+
+ -i[c[k]] Replace spaces with tabs on output. Optional argument
+ C is the output tab character. (Default is `\t'.) Optional
+ argument K is the output tab character's width. (Default
+ is 8.)
+
+ -l lines Set the page length to LINES. Default is 66.
+
+ -m Print files in parallel.
+
+ -n[c[k]] Precede each column with a line number.
+ (With parallel files, precede each line with a line
+ number.) Optional argument C is the character to print
+ after each number. (Default `\t'.) Optional argument
+ K is the number of digits per line number. (Default 5.)
+
+ -o offset Offset each line with a margin OFFSET spaces wide.
+ Total page width is the size of this offset plus the
+ width set with `-w'.
+
+ -r Ignore files that can't be opened.
+
+ -s[c] Separate each line with a character. Optional argument C is
+ the character to be used. Default is `\t'.
+
+ -t Do not print headers or footers.
+
+ -v Print unprintable characters as escape sequences.
+ Control-G becomes \007.
+
+ -w width Set the page width to WIDTH characters. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include <time.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISPRINT(c) (isascii (c) && isprint (c))
+#define ISDIGIT(c) (isascii (c) && isdigit (c))
+#else
+#define ISPRINT(c) isprint (c)
+#define ISDIGIT(c) isdigit (c)
+#endif
+
+int char_to_clump ();
+int read_line ();
+int print_page ();
+int print_stored ();
+char *xmalloc ();
+char *xrealloc ();
+int open_file ();
+int skip_to_page ();
+void error ();
+void getoptarg ();
+void usage ();
+void print_files ();
+void init_header ();
+void init_store_cols ();
+void store_columns ();
+void balance ();
+void store_char ();
+void pad_down ();
+void read_rest_of_line ();
+void print_char ();
+void cleanup ();
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+/* Used with start_position in the struct COLUMN described below.
+ If start_position == ANYWHERE, we aren't truncating columns and
+ can begin printing a column anywhere. Otherwise we must pad to
+ the horizontal position start_position. */
+#define ANYWHERE 0
+
+/* Each column has one of these structures allocated for it.
+ If we're only dealing with one file, fp is the same for all
+ columns.
+
+ The general strategy is to spend time setting up these column
+ structures (storing columns if necessary), after which printing
+ is a matter of flitting from column to column and calling
+ print_func.
+
+ Parallel files, single files printing across in multiple
+ columns, and single files printing down in multiple columns all
+ fit the same printing loop.
+
+ print_func Function used to print lines in this column.
+ If we're storing this column it will be
+ print_stored(), Otherwise it will be read_line().
+
+ char_func Function used to process characters in this column.
+ If we're storing this column it will be store_char(),
+ otherwise it will be print_char().
+
+ current_line Index of the current entry in line_vector, which
+ contains the index of the first character of the
+ current line in buff[].
+
+ lines_stored Number of lines in this column which are stored in
+ buff.
+
+ lines_to_print If we're storing this column, lines_to_print is
+ the number of stored_lines which remain to be
+ printed. Otherwise it is the number of lines
+ we can print without exceeding lines_per_body.
+
+ start_position The horizontal position we want to be in before we
+ print the first character in this column.
+
+ numbered True means precede this column with a line number. */
+
+struct COLUMN
+{
+ FILE *fp; /* Input stream for this column. */
+ char *name; /* File name. */
+ enum
+ {
+ OPEN,
+ ON_HOLD, /* Hit a form feed. */
+ CLOSED
+ } status; /* Status of the file pointer. */
+ int (*print_func) (); /* Func to print lines in this col. */
+ void (*char_func) (); /* Func to print/store chars in this col. */
+ int current_line; /* Index of current place in line_vector. */
+ int lines_stored; /* Number of lines stored in buff. */
+ int lines_to_print; /* No. lines stored or space left on page. */
+ int start_position; /* Horizontal position of first char. */
+ int numbered;
+};
+
+typedef struct COLUMN COLUMN;
+
+#define NULLCOL (COLUMN *)0
+
+/* All of the columns to print. */
+COLUMN *column_vector;
+
+/* When printing a single file in multiple downward columns,
+ we store the leftmost columns contiguously in buff.
+ To print a line from buff, get the index of the first char
+ from line_vector[i], and print up to line_vector[i + 1]. */
+char *buff;
+
+/* Index of the position in buff where the next character
+ will be stored. */
+int buff_current;
+
+/* The number of characters in buff.
+ Used for allocation of buff and to detect overflow of buff. */
+int buff_allocated;
+
+/* Array of indices into buff.
+ Each entry is an index of the first character of a line.
+ This is used when storing lines to facilitate shuffling when
+ we do column balancing on the last page. */
+int *line_vector;
+
+/* Array of horizonal positions.
+ For each line in line_vector, end_vector[line] is the horizontal
+ position we are in after printing that line. We keep track of this
+ so that we know how much we need to pad to prepare for the next
+ column. */
+int *end_vector;
+
+/* (-m) True means we're printing multiple files in parallel. */
+int parallel_files = FALSE;
+
+/* (-[0-9]+) True means we're given an option explicitly specifying
+ number of columns. Used to detect when this option is used with -m. */
+int explicit_columns = FALSE;
+
+/* (-t) True means we're printing headers and footers. */
+int extremities = TRUE;
+
+/* True means we need to print a header as soon as we know we've got input
+ to print after it. */
+int print_a_header;
+
+/* (-h) True means we're using the standard header rather than a
+ customized one specified by the -h flag. */
+int standard_header = TRUE;
+
+/* (-f) True means use formfeeds instead of newlines to separate pages. */
+int use_form_feed = FALSE;
+
+/* True means we haven't encountered any filenames in the argument list. */
+int input_is_stdin = TRUE;
+
+/* True means we have read the standard input. */
+int have_read_stdin = FALSE;
+
+/* True means the -a flag has been given. */
+int print_across_flag = FALSE;
+
+/* True means we're printing one file in multiple (>1) downward columns. */
+int storing_columns = TRUE;
+
+/* (-b) True means balance columns on the last page as Sys V does. */
+int balance_columns = FALSE;
+
+/* (-l) Number of lines on a page, including header and footer lines. */
+int lines_per_page = 66;
+
+/* Number of lines in the header and footer can be reset to 0 using
+ the -t flag. */
+int lines_per_header = 5;
+int lines_per_body;
+int lines_per_footer = 5;
+
+/* (-w) Width in characters of the page. Does not include the width of
+ the margin. */
+int chars_per_line = 72;
+
+/* Number of characters in a column. Based on the gutter and page widths. */
+int chars_per_column;
+
+/* (-e) True means convert tabs to spaces on input. */
+int untabify_input = FALSE;
+
+/* (-e) The input tab character. */
+char input_tab_char = '\t';
+
+/* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
+ where the leftmost column is 1. */
+int chars_per_input_tab = 8;
+
+/* (-i) True means convert spaces to tabs on output. */
+int tabify_output = FALSE;
+
+/* (-i) The output tab character. */
+char output_tab_char = '\t';
+
+/* (-i) The width of the output tab. */
+int chars_per_output_tab = 8;
+
+/* Keeps track of pending white space. When we hit a nonspace
+ character after some whitespace, we print whitespace, tabbing
+ if necessary to get to output_position + spaces_not_printed. */
+int spaces_not_printed;
+
+/* Number of spaces between columns (though tabs can be used when possible to
+ use up the equivalent amount of space). Not sure if this is worth making
+ a flag for. BSD uses 0, Sys V uses 1. Sys V looks better. */
+int chars_per_gutter = 1;
+
+/* (-o) Number of spaces in the left margin (tabs used when possible). */
+int chars_per_margin = 0;
+
+/* Position where the next character will fall.
+ Leftmost position is 0 + chars_per_margin.
+ Rightmost position is chars_per_margin + chars_per_line - 1.
+ This is important for converting spaces to tabs on output. */
+int output_position;
+
+/* Horizontal position relative to the current file.
+ (output_position depends on where we are on the page;
+ input_position depends on where we are in the file.)
+ Important for converting tabs to spaces on input. */
+int input_position;
+
+/* Count number of failed opens so we can exit with non-zero
+ status if there were any. */
+int failed_opens = 0;
+
+/* The horizontal position we'll be at after printing a tab character
+ of width c_ from the position h_. */
+#define pos_after_tab(c_, h_) h_ - h_ % c_ + c_
+
+/* The number of spaces taken up if we print a tab character with width
+ c_ from position h_. */
+#define tab_width(c_, h_) - h_ % c_ + c_
+
+/* (-NNN) Number of columns of text to print. */
+int columns = 1;
+
+/* (+NNN) Page number on which to begin printing. */
+int first_page_number = 1;
+
+/* Number of files open (not closed, not on hold). */
+int files_ready_to_read = 0;
+
+/* Number of columns with either an open file or stored lines. */
+int cols_ready_to_print = 0;
+
+/* Current page number. Displayed in header. */
+int page_number;
+
+/* Current line number. Displayed when -n flag is specified.
+
+ When printing files in parallel (-m flag), line numbering is as follows:
+ 1 foo goo moo
+ 2 hoo too zoo
+
+ When printing files across (-a flag), ...
+ 1 foo 2 moo 3 goo
+ 4 hoo 3 too 6 zoo
+
+ Otherwise, line numbering is as follows:
+ 1 foo 3 goo 5 too
+ 2 moo 4 hoo 6 zoo */
+int line_number;
+
+/* (-n) True means lines should be preceded by numbers. */
+int numbered_lines = FALSE;
+
+/* True means print a number as soon as we know we'll be printing
+ from the current column. */
+int print_a_number;
+
+/* (-n) Character which follows each line number. */
+char number_separator = '\t';
+
+/* (-n) Width in characters of a line number. */
+int chars_per_number = 5;
+
+/* Used when widening the first column to accommodate numbers -- only
+ needed when printing files in parallel. Includes width of both the
+ number and the number_separator. */
+int number_width;
+
+/* Buffer sprintf uses to format a line number. */
+char *number_buff;
+
+/* (-v) True means unprintable characters are printed as escape sequences.
+ control-g becomes \007. */
+int use_esc_sequence = FALSE;
+
+/* (-c) True means unprintable characters are printed as control prefixes.
+ control-g becomes ^G. */
+int use_cntrl_prefix = FALSE;
+
+/* (-d) True means output is double spaced. */
+int double_space = FALSE;
+
+/* Number of files opened initially in init_files. Should be 1
+ unless we're printing multiple files in parallel. */
+int total_files = 0;
+
+/* (-r) True means don't complain if we can't open a file. */
+int ignore_failed_opens = FALSE;
+
+/* (-s) True means we separate columns with a specified character. */
+int use_column_separator = FALSE;
+
+/* Character used to separate columns if the the -s flag has been specified. */
+char column_separator = '\t';
+
+/* Number of separator characters waiting to be printed as soon as we
+ know that we have any input remaining to be printed. */
+int separators_not_printed;
+
+/* Position we need to pad to, as soon as we know that we have input
+ remaining to be printed. */
+int padding_not_printed;
+
+/* True means we should pad the end of the page. Remains false until we
+ know we have a page to print. */
+int pad_vertically;
+
+/* (-h) String of characters used in place of the filename in the header. */
+char *custom_header;
+
+/* String containing the date, filename or custom header, and "Page ". */
+char *header;
+
+int *clump_buff;
+
+/* True means we truncate lines longer than chars_per_column. */
+int truncate_lines = FALSE;
+
+/* The name under which this program was invoked. */
+char *program_name;
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ char *s;
+ int files = 0;
+ char **file_names, **file_name_vector;
+ int accum = 0;
+
+ program_name = argv[0];
+
+ file_name_vector = (char **) xmalloc (argc * sizeof (char **));
+ file_names = file_name_vector;
+
+ for (;;)
+ {
+ c = getopt (argc, argv, "-0123456789abcde::fFh:i::l:mn::o:rs::tvw:");
+
+ if (c == 1) /* Non-option argument. */
+ {
+ s = optarg;
+ if (*s == '+')
+ {
+ if (!ISDIGIT (*++s))
+ usage ("`+' requires a numeric argument");
+ first_page_number = atoi (s);
+ }
+ else
+ {
+ *file_names++ = optarg;
+ ++files;
+ }
+ }
+ else if (files > 0)
+ {
+ if (parallel_files && explicit_columns)
+ error (1, 0,
+"Cannot specify number of columns when printing in parallel.");
+
+ if (parallel_files && print_across_flag)
+ error (1, 0,
+"Cannot specify both printing across and printing in parallel.");
+
+ if (parallel_files)
+ print_files (files, file_name_vector);
+ else
+ {
+ file_names = file_name_vector;
+ while (files--)
+ print_files (1, file_names++);
+ }
+
+ input_is_stdin = FALSE;
+ file_names = file_name_vector;
+ files = 0;
+ cleanup ();
+ }
+
+ if (ISDIGIT (c))
+ {
+ accum = accum * 10 + c - '0';
+ continue;
+ }
+ else
+ {
+ if (accum > 0)
+ {
+ columns = accum;
+ explicit_columns = TRUE;
+ }
+ accum = 0;
+ }
+
+ switch (c)
+ {
+ case 'a':
+ print_across_flag = TRUE;
+ storing_columns = FALSE;
+ break;
+ case 'b':
+ balance_columns = TRUE;
+ break;
+ case 'c':
+ use_cntrl_prefix = TRUE;
+ break;
+ case 'd':
+ double_space = TRUE;
+ break;
+ case 'e':
+ if (optarg)
+ getoptarg (optarg, 'e', &input_tab_char,
+ &chars_per_input_tab);
+ /* Could check tab width > 0. */
+ untabify_input = TRUE;
+ break;
+ case 'f':
+ case 'F':
+ use_form_feed = TRUE;
+ break;
+ case 'h':
+ custom_header = optarg;
+ standard_header = FALSE;
+ break;
+ case 'i':
+ if (optarg)
+ getoptarg (optarg, 'i', &output_tab_char,
+ &chars_per_output_tab);
+ /* Could check tab width > 0. */
+ tabify_output = TRUE;
+ break;
+ case 'l':
+ lines_per_page = atoi (optarg);
+ break;
+ case 'm':
+ parallel_files = TRUE;
+ storing_columns = FALSE;
+ break;
+ case 'n':
+ numbered_lines = TRUE;
+ if (optarg)
+ getoptarg (optarg, 'n', &number_separator,
+ &chars_per_number);
+ break;
+ case 'o':
+ chars_per_margin = atoi (optarg);
+ break;
+ case 'r':
+ ignore_failed_opens = TRUE;
+ break;
+ case 's':
+ use_column_separator = TRUE;
+ if (optarg)
+ {
+ s = optarg;
+ column_separator = *s;
+ if (*++s)
+ {
+ fprintf (stderr, "\
+%s: extra characters in the argument to the `-s' option: `%s'\n",
+ program_name, s);
+ usage ((char *) 0);
+ }
+ }
+ break;
+ case 't':
+ extremities = FALSE;
+ break;
+ case 'v':
+ use_esc_sequence = TRUE;
+ break;
+ case 'w':
+ chars_per_line = atoi (optarg);
+ break;
+ case '?':
+ usage ((char *) 0);
+ break;
+ }
+
+ if (c == EOF)
+ break;
+ }
+
+ if (input_is_stdin)
+ print_files (0, (char **) 0);
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "standard input");
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, errno, "write error");
+ if (failed_opens > 0)
+ exit(1);
+ exit (0);
+}
+
+/* Parse options of the form -scNNN.
+
+ Example: -nck, where 'n' is the option, c is the optional number
+ separator, and k is the optional width of the field used when printing
+ a number. */
+
+void
+getoptarg (arg, switch_char, character, number)
+ char *arg, switch_char, *character;
+ int *number;
+{
+ if (!ISDIGIT (*arg))
+ *character = *arg++;
+ if (*arg)
+ {
+ if (ISDIGIT (*arg))
+ *number = atoi (arg);
+ else
+ {
+ fprintf (stderr, "\
+%s: extra characters in the argument to the `-%c' option: `%s'\n",
+ program_name, switch_char, arg);
+ usage ((char *) 0);
+ }
+ }
+}
+
+/* Set parameters related to formatting. */
+
+void
+init_parameters (number_of_files)
+ int number_of_files;
+{
+ int chars_used_by_number = 0;
+
+ lines_per_body = lines_per_page - lines_per_header - lines_per_footer;
+ if (lines_per_body <= 0)
+ extremities = FALSE;
+ if (extremities == FALSE)
+ lines_per_body = lines_per_page;
+
+ if (double_space)
+ lines_per_body = lines_per_body / 2;
+
+ /* If input is stdin, cannot print parallel files. BSD dumps core
+ on this. */
+ if (number_of_files == 0)
+ parallel_files = FALSE;
+
+ if (parallel_files)
+ columns = number_of_files;
+
+ /* Tabification is assumed for multiple columns. */
+ if (columns > 1)
+ {
+ if (!use_column_separator)
+ truncate_lines = TRUE;
+
+ untabify_input = TRUE;
+ tabify_output = TRUE;
+ }
+ else
+ storing_columns = FALSE;
+
+ if (numbered_lines)
+ {
+ if (number_separator == input_tab_char)
+ {
+ number_width = chars_per_number +
+ tab_width (chars_per_input_tab,
+ (chars_per_margin + chars_per_number));
+ }
+ else
+ number_width = chars_per_number + 1;
+ /* The number is part of the column width unless we are
+ printing files in parallel. */
+ if (parallel_files)
+ chars_used_by_number = number_width;
+ }
+
+ chars_per_column = (chars_per_line - chars_used_by_number -
+ (columns - 1) * chars_per_gutter) / columns;
+
+ if (chars_per_column < 1)
+ error (1, 0, "page width too narrow");
+
+ if (numbered_lines)
+ {
+ if (number_buff != (char *) 0)
+ free (number_buff);
+ number_buff = (char *)
+ xmalloc (2 * chars_per_number * sizeof (char));
+ }
+
+ /* Pick the maximum between the tab width and the width of an
+ escape sequence. */
+ if (clump_buff != (int *) 0)
+ free (clump_buff);
+ clump_buff = (int *) xmalloc ((chars_per_input_tab > 4
+ ? chars_per_input_tab : 4) * sizeof (int));
+}
+
+/* Open the necessary files,
+ maintaining a COLUMN structure for each column.
+
+ With multiple files, each column p has a different p->fp.
+ With single files, each column p has the same p->fp.
+ Return 1 if (number_of_files > 0) and no files can be opened,
+ 0 otherwise. */
+
+int
+init_fps (number_of_files, av)
+ int number_of_files;
+ char **av;
+{
+ int i, files_left;
+ COLUMN *p;
+ FILE *firstfp;
+ char *firstname;
+
+ total_files = 0;
+
+ if (column_vector != NULLCOL)
+ free ((char *) column_vector);
+ column_vector = (COLUMN *) xmalloc (columns * sizeof (COLUMN));
+
+ if (parallel_files)
+ {
+ files_left = number_of_files;
+ for (p = column_vector; files_left--; ++p, ++av)
+ {
+ if (open_file (*av, p) == 0)
+ {
+ --p;
+ --columns;
+ }
+ }
+ if (columns == 0)
+ return 1;
+ init_header ("", -1);
+ }
+ else
+ {
+ p = column_vector;
+ if (number_of_files > 0)
+ {
+ if (open_file (*av, p) == 0)
+ return 1;
+ init_header (*av, fileno (p->fp));
+ }
+ else
+ {
+ p->name = "standard input";
+ p->fp = stdin;
+ have_read_stdin = TRUE;
+ p->status = OPEN;
+ ++total_files;
+ init_header ("", -1);
+ }
+
+ firstname = p->name;
+ firstfp = p->fp;
+ for (i = columns - 1, ++p; i; --i, ++p)
+ {
+ p->name = firstname;
+ p->fp = firstfp;
+ p->status = OPEN;
+ }
+ }
+ files_ready_to_read = total_files;
+ return 0;
+}
+
+/* Determine print_func and char_func, the functions
+ used by each column for printing and/or storing.
+
+ Determine the horizontal position desired when we begin
+ printing a column (p->start_position). */
+
+void
+init_funcs ()
+{
+ int i, h, h_next;
+ COLUMN *p;
+
+ h = chars_per_margin;
+
+ if (use_column_separator)
+ h_next = ANYWHERE;
+ else
+ {
+ /* When numbering lines of parallel files, we enlarge the
+ first column to accomodate the number. Looks better than
+ the Sys V approach. */
+ if (parallel_files && numbered_lines)
+ h_next = h + chars_per_column + number_width;
+ else
+ h_next = h + chars_per_column;
+ }
+
+ /* This loop takes care of all but the rightmost column. */
+
+ for (p = column_vector, i = 1; i < columns; ++p, ++i)
+ {
+ if (storing_columns) /* One file, multi columns down. */
+ {
+ p->char_func = store_char;
+ p->print_func = print_stored;
+ }
+ else
+ /* One file, multi columns across; or parallel files. */
+ {
+ p->char_func = print_char;
+ p->print_func = read_line;
+ }
+
+ /* Number only the first column when printing files in
+ parallel. */
+ p->numbered = numbered_lines && (!parallel_files || i == 1);
+ p->start_position = h;
+
+ /* If we're using separators, all start_positions are
+ ANYWHERE, except the first column's start_position when
+ using a margin. */
+
+ if (use_column_separator)
+ {
+ h = ANYWHERE;
+ h_next = ANYWHERE;
+ }
+ else
+ {
+ h = h_next + chars_per_gutter;
+ h_next = h + chars_per_column;
+ }
+ }
+
+ /* The rightmost column.
+
+ Doesn't need to be stored unless we intend to balance
+ columns on the last page. */
+ if (storing_columns && balance_columns)
+ {
+ p->char_func = store_char;
+ p->print_func = print_stored;
+ }
+ else
+ {
+ p->char_func = print_char;
+ p->print_func = read_line;
+ }
+
+ p->numbered = numbered_lines && (!parallel_files || i == 1);
+ p->start_position = h;
+}
+
+/* Open a file. Return nonzero if successful, zero if failed. */
+
+int
+open_file (name, p)
+ char *name;
+ COLUMN *p;
+{
+ if (!strcmp (name, "-"))
+ {
+ p->name = "standard input";
+ p->fp = stdin;
+ have_read_stdin = 1;
+ }
+ else
+ {
+ p->name = name;
+ p->fp = fopen (name, "r");
+ }
+ if (p->fp == NULL)
+ {
+ ++failed_opens;
+ if (!ignore_failed_opens)
+ error (0, errno, "%s", name);
+ return 0;
+ }
+ p->status = OPEN;
+ ++total_files;
+ return 1;
+}
+
+/* Close the file in P.
+
+ If we aren't dealing with multiple files in parallel, we change
+ the status of all columns in the column list to reflect the close. */
+
+void
+close_file (p)
+ COLUMN *p;
+{
+ COLUMN *q;
+ int i;
+
+ if (p->status == CLOSED)
+ return;
+ if (ferror (p->fp))
+ error (1, errno, "%s", p->name);
+ if (p->fp != stdin && fclose (p->fp) == EOF)
+ error (1, errno, "%s", p->name);
+
+ if (!parallel_files)
+ {
+ for (q = column_vector, i = columns; i; ++q, --i)
+ {
+ q->status = CLOSED;
+ if (q->lines_stored == 0)
+ {
+#if 0
+ if (cols_ready_to_print > 0)
+ --cols_ready_to_print;
+#endif
+ q->lines_to_print = 0;
+ }
+ }
+ }
+ else
+ {
+ p->status = CLOSED;
+ p->lines_to_print = 0;
+ }
+
+ --files_ready_to_read;
+}
+
+/* Put a file on hold until we start a new page,
+ since we've hit a form feed.
+
+ If we aren't dealing with parallel files, we must change the
+ status of all columns in the column list. */
+
+void
+hold_file (p)
+ COLUMN *p;
+{
+ COLUMN *q;
+ int i;
+
+ if (!parallel_files)
+ for (q = column_vector, i = columns; i; ++q, --i)
+ q->status = ON_HOLD;
+ else
+ p->status = ON_HOLD;
+ p->lines_to_print = 0;
+ --files_ready_to_read;
+}
+
+/* Undo hold_file -- go through the column list and change any
+ ON_HOLD columns to OPEN. Used at the end of each page. */
+
+void
+reset_status ()
+{
+ int i = columns;
+ COLUMN *p;
+
+ for (p = column_vector; i; --i, ++p)
+ if (p->status == ON_HOLD)
+ {
+ p->status = OPEN;
+ files_ready_to_read++;
+ }
+}
+
+/* Print a single file, or multiple files in parallel.
+
+ Set up the list of columns, opening the necessary files.
+ Allocate space for storing columns, if necessary.
+ Skip to first_page_number, if user has asked to skip leading pages.
+ Determine which functions are appropriate to store/print lines
+ in each column.
+ Print the file(s). */
+
+void
+print_files (number_of_files, av)
+ int number_of_files;
+ char **av;
+{
+ init_parameters (number_of_files);
+ if (init_fps (number_of_files, av))
+ return;
+ if (storing_columns)
+ init_store_cols ();
+
+ if (first_page_number > 1)
+ {
+ if (!skip_to_page (first_page_number))
+ return;
+ else
+ page_number = first_page_number;
+ }
+ else
+ page_number = 1;
+
+ init_funcs ();
+
+ line_number = 1;
+ while (print_page ())
+ ;
+}
+
+/* Generous estimate of number of characters taken up by "Jun 7 00:08 " and
+ "Page NNNNN". */
+#define CHARS_FOR_DATE_AND_PAGE 50
+
+/* Initialize header information.
+ If DESC is non-negative, it is a file descriptor open to
+ FILENAME for reading.
+
+ Allocate space for a header string,
+ Determine the time, insert file name or user-specified string.
+
+ It might be nice to have a "blank headers" option, since
+ pr -h "" still prints the date and page number. */
+
+void
+init_header (filename, desc)
+ char *filename;
+ int desc;
+{
+ int chars_per_header;
+ char *f = filename;
+ char *t, *middle;
+ struct stat st;
+
+ if (filename == 0)
+ f = "";
+
+ /* If parallel files or standard input, use current time. */
+ if (desc < 0 || !strcmp (filename, "-") || fstat (desc, &st))
+ st.st_mtime = time ((time_t *) 0);
+ t = ctime (&st.st_mtime);
+
+ t[16] = '\0'; /* Mark end of month and time string. */
+ t[24] = '\0'; /* Mark end of year string. */
+
+ middle = standard_header ? f : custom_header;
+
+ chars_per_header = strlen (middle) + CHARS_FOR_DATE_AND_PAGE + 1;
+ if (header != (char *) 0)
+ free (header);
+ header = (char *) xmalloc (chars_per_header * sizeof (char));
+
+ sprintf (header, "%s %s %s Page", &t[4], &t[20], middle);
+}
+
+/* Set things up for printing a page
+
+ Scan through the columns ...
+ Determine which are ready to print
+ (i.e., which have lines stored or open files)
+ Set p->lines_to_print appropriately
+ (to p->lines_stored if we're storing, or lines_per_body
+ if we're reading straight from the file)
+ Keep track of this total so we know when to stop printing */
+
+void
+init_page ()
+{
+ int j;
+ COLUMN *p;
+
+ cols_ready_to_print = 0;
+
+ if (storing_columns)
+ {
+ store_columns ();
+ for (j = columns - 1, p = column_vector; j; --j, ++p)
+ {
+ p->lines_to_print = p->lines_stored;
+ if (p->lines_to_print != 0)
+ ++cols_ready_to_print;
+ }
+
+ /* Last column. */
+ if (balance_columns)
+ {
+ p->lines_to_print = p->lines_stored;
+ if (p->lines_to_print != 0)
+ ++cols_ready_to_print;
+ }
+ /* Since we're not balancing columns, we don't need to store
+ the rightmost column. Read it straight from the file. */
+ else
+ {
+ if (p->status == OPEN)
+ {
+ p->lines_to_print = lines_per_body;
+ ++cols_ready_to_print;
+ }
+ else
+ p->lines_to_print = 0;
+ }
+ }
+ else
+ for (j = columns, p = column_vector; j; --j, ++p)
+ if (p->status == OPEN)
+ {
+ p->lines_to_print = lines_per_body;
+ ++cols_ready_to_print;
+ }
+ else
+ p->lines_to_print = 0;
+}
+
+/* Print one page.
+
+ As long as there are lines left on the page and columns ready to print,
+ Scan across the column list
+ if the column has stored lines or the file is open
+ pad to the appropriate spot
+ print the column
+ pad the remainder of the page with \n or \f as requested
+ reset the status of all files -- any files which where on hold because
+ of formfeeds are now put back into the lineup. */
+
+int
+print_page ()
+{
+ int j;
+ int lines_left_on_page;
+ COLUMN *p;
+
+ /* Used as an accumulator (with | operator) of successive values of
+ pad_vertically. The trick is to set pad_vertically
+ to zero before each run through the inner loop, then after that
+ loop, it tells us whether a line was actually printed (whether a
+ newline needs to be output -- or two for double spacing). But those
+ values have to be accumulated (in pv) so we can invoke pad_down
+ properly after the outer loop completes. */
+ int pv;
+
+ init_page ();
+
+ if (cols_ready_to_print == 0)
+ return FALSE;
+
+ if (extremities)
+ print_a_header = TRUE;
+
+ /* Don't pad unless we know a page was printed. */
+ pad_vertically = FALSE;
+ pv = FALSE;
+
+ lines_left_on_page = lines_per_body;
+ if (double_space)
+ lines_left_on_page *= 2;
+
+ while (lines_left_on_page > 0 && cols_ready_to_print > 0)
+ {
+ output_position = 0;
+ spaces_not_printed = 0;
+ separators_not_printed = 0;
+ pad_vertically = FALSE;
+
+ for (j = 1, p = column_vector; j <= columns; ++j, ++p)
+ {
+ input_position = 0;
+ if (p->lines_to_print > 0)
+ {
+ padding_not_printed = p->start_position;
+
+ if (!(p->print_func) (p))
+ read_rest_of_line (p);
+ pv |= pad_vertically;
+
+ if (use_column_separator)
+ ++separators_not_printed;
+
+ if (--p->lines_to_print <= 0 && --cols_ready_to_print <= 0)
+ break;
+ }
+ }
+
+ if (pad_vertically)
+ {
+ putchar ('\n');
+ --lines_left_on_page;
+ }
+
+ if (double_space && pv && extremities)
+ {
+ putchar ('\n');
+ --lines_left_on_page;
+ }
+ }
+
+ pad_vertically = pv;
+
+ if (pad_vertically && extremities)
+ pad_down (lines_left_on_page + lines_per_footer);
+
+ reset_status (); /* Change ON_HOLD to OPEN. */
+
+ return TRUE; /* More pages to go. */
+}
+
+/* Allocate space for storing columns.
+
+ This is necessary when printing multiple columns from a single file.
+ Lines are stored consecutively in buff, separated by '\0'.
+ (We can't use a fixed offset since with the '-s' flag lines aren't
+ truncated.)
+
+ We maintain a list (line_vector) of pointers to the beginnings
+ of lines in buff. We allocate one more than the number of lines
+ because the last entry tells us the index of the last character,
+ which we need to know in order to print the last line in buff. */
+
+void
+init_store_cols ()
+{
+ int total_lines = lines_per_body * columns;
+ int chars_if_truncate = total_lines * (chars_per_column + 1);
+
+ if (line_vector != (int *) 0)
+ free ((int *) line_vector);
+ line_vector = (int *) xmalloc ((total_lines + 1) * sizeof (int *));
+
+ if (end_vector != (int *) 0)
+ free ((int *) end_vector);
+ end_vector = (int *) xmalloc (total_lines * sizeof (int *));
+
+ if (buff != (char *) 0)
+ free (buff);
+ buff_allocated = use_column_separator ? 2 * chars_if_truncate
+ : chars_if_truncate; /* Tune this. */
+ buff = (char *) xmalloc (buff_allocated * sizeof (char));
+}
+
+/* Store all but the rightmost column.
+ (Used when printing a single file in multiple downward columns)
+
+ For each column
+ set p->current_line to be the index in line_vector of the
+ first line in the column
+ For each line in the column
+ store the line in buff
+ add to line_vector the index of the line's first char
+ buff_start is the index in buff of the first character in the
+ current line. */
+
+void
+store_columns ()
+{
+ int i, j;
+ int line = 0;
+ int buff_start;
+ int last_col; /* The rightmost column which will be saved in buff */
+ COLUMN *p;
+
+ buff_current = 0;
+ buff_start = 0;
+
+ if (balance_columns)
+ last_col = columns;
+ else
+ last_col = columns - 1;
+
+ for (i = 1, p = column_vector; i <= last_col; ++i, ++p)
+ p->lines_stored = 0;
+
+ for (i = 1, p = column_vector; i <= last_col && files_ready_to_read;
+ ++i, ++p)
+ {
+ p->current_line = line;
+ for (j = lines_per_body; j && files_ready_to_read; --j)
+
+ if (p->status == OPEN) /* Redundant. Clean up. */
+ {
+ input_position = 0;
+
+ if (!read_line (p, i))
+ read_rest_of_line (p);
+
+ if (p->status == OPEN
+ || buff_start != buff_current)
+ {
+ ++p->lines_stored;
+ line_vector[line] = buff_start;
+ end_vector[line++] = input_position;
+ buff_start = buff_current;
+ }
+ }
+ }
+
+ /* Keep track of the location of the last char in buff. */
+ line_vector[line] = buff_start;
+
+ if (balance_columns && p->lines_stored != lines_per_body)
+ balance (line);
+}
+
+void
+balance (total_stored)
+ int total_stored;
+{
+ COLUMN *p;
+ int i, lines;
+ int first_line = 0;
+
+ for (i = 1, p = column_vector; i <= columns; ++i, ++p)
+ {
+ lines = total_stored / columns;
+ if (i <= total_stored % columns)
+ ++lines;
+
+ p->lines_stored = lines;
+ p->current_line = first_line;
+
+ first_line += lines;
+ }
+}
+
+/* Store a character in the buffer. */
+
+void
+store_char (c)
+ int c;
+{
+ if (buff_current >= buff_allocated)
+ {
+ /* May be too generous. */
+ buff_allocated = 2 * buff_allocated;
+ buff = (char *) xrealloc (buff, buff_allocated * sizeof (char));
+ }
+ buff[buff_current++] = (char) c;
+}
+
+void
+number (p)
+ COLUMN *p;
+{
+ int i;
+ char *s;
+
+ sprintf (number_buff, "%*d", chars_per_number, line_number++);
+ s = number_buff;
+ for (i = chars_per_number; i > 0; i--)
+ (p->char_func) ((int) *s++);
+
+ if (number_separator == input_tab_char)
+ {
+ i = number_width - chars_per_number;
+ while (i-- > 0)
+ (p->char_func) ((int) ' ');
+ }
+ else
+ (p->char_func) ((int) number_separator);
+
+ if (truncate_lines && !parallel_files)
+ input_position += number_width;
+}
+
+/* Print (or store) padding until the current horizontal position
+ is position. */
+
+void
+pad_across_to (position)
+ int position;
+{
+ register int h = output_position;
+
+ if (tabify_output)
+ spaces_not_printed = position - output_position;
+ else
+ {
+ while (++h <= position)
+ putchar (' ');
+ output_position = position;
+ }
+}
+
+/* Pad to the bottom of the page.
+
+ If the user has requested a formfeed, use one.
+ Otherwise, use newlines. */
+
+void
+pad_down (lines)
+ int lines;
+{
+ register int i;
+
+ if (use_form_feed)
+ putchar ('\f');
+ else
+ for (i = lines; i; --i)
+ putchar ('\n');
+}
+
+/* Read the rest of the line.
+
+ Read from the current column's file until an end of line is
+ hit. Used when we've truncated a line and we no longer need
+ to print or store its characters. */
+
+void
+read_rest_of_line (p)
+ COLUMN *p;
+{
+ register int c;
+ FILE *f = p->fp;
+
+ while ((c = getc (f)) != '\n')
+ {
+ if (c == '\f')
+ {
+ hold_file (p);
+ break;
+ }
+ else if (c == EOF)
+ {
+ close_file (p);
+ break;
+ }
+ }
+}
+
+/* If we're tabifying output,
+
+ When print_char encounters white space it keeps track
+ of our desired horizontal position and delays printing
+ until this function is called. */
+
+void
+print_white_space ()
+{
+ register int h_new;
+ register int h_old = output_position;
+ register int goal = h_old + spaces_not_printed;
+
+ while (goal - h_old > 1
+ && (h_new = pos_after_tab (chars_per_output_tab, h_old)) <= goal)
+ {
+ putchar (output_tab_char);
+ h_old = h_new;
+ }
+ while (++h_old <= goal)
+ putchar (' ');
+
+ output_position = goal;
+ spaces_not_printed = 0;
+}
+
+/* Print column separators.
+
+ We keep a count until we know that we'll be printing a line,
+ then print_separators() is called. */
+
+void
+print_separators ()
+{
+ for (; separators_not_printed > 0; --separators_not_printed)
+ print_char (column_separator);
+}
+
+/* Print (or store, depending on p->char_func) a clump of N
+ characters. */
+
+void
+print_clump (p, n, clump)
+ COLUMN *p;
+ int n;
+ int *clump;
+{
+ while (n--)
+ (p->char_func) (*clump++);
+}
+
+/* Print a character.
+
+ If we're tabifying, all tabs have been converted to spaces by
+ process_char(). Keep a count of consecutive spaces, and when
+ a nonspace is encountered, call print_white_space() to print the
+ required number of tabs and spaces. */
+
+void
+print_char (c)
+ int c;
+{
+ if (tabify_output)
+ {
+ if (c == ' ')
+ {
+ ++spaces_not_printed;
+ return;
+ }
+ else if (spaces_not_printed > 0)
+ print_white_space ();
+
+ /* Nonprintables are assumed to have width 0, except '\b'. */
+ if (!ISPRINT (c))
+ {
+ if (c == '\b')
+ --output_position;
+ }
+ else
+ ++output_position;
+ }
+ putchar (c);
+}
+
+/* Skip to page PAGE before printing. */
+
+int
+skip_to_page (page)
+ int page;
+{
+ int n, i, j;
+ COLUMN *p;
+
+ for (n = 1; n < page; ++n)
+ {
+ for (i = 1; i <= lines_per_body; ++i)
+ {
+ for (j = 1, p = column_vector; j <= columns; ++j, ++p)
+ read_rest_of_line (p);
+ }
+ reset_status ();
+ }
+ return files_ready_to_read > 0;
+}
+
+/* Print a header.
+
+ Formfeeds are assumed to use up two lines at the beginning of
+ the page. */
+
+void
+print_header ()
+{
+ if (!use_form_feed)
+ fprintf (stdout, "\n\n");
+
+ output_position = 0;
+ pad_across_to (chars_per_margin);
+ print_white_space ();
+
+ fprintf (stdout, "%s %d\n\n\n", header, page_number++);
+
+ print_a_header = FALSE;
+ output_position = 0;
+}
+
+/* Print (or store, if p->char_func is store_char()) a line.
+
+ Read a character to determine whether we have a line or not.
+ (We may hit EOF, \n, or \f)
+
+ Once we know we have a line,
+ set pad_vertically = TRUE, meaning it's safe
+ to pad down at the end of the page, since we do have a page.
+ print a header if needed.
+ pad across to padding_not_printed if needed.
+ print any separators which need to be printed.
+ print a line number if it needs to be printed.
+
+ Print the clump which corresponds to the first character.
+
+ Enter a loop and keep printing until an end of line condition
+ exists, or until we exceed chars_per_column.
+
+ Return FALSE if we exceed chars_per_column before reading
+ an end of line character, TRUE otherwise. */
+
+int
+read_line (p)
+ COLUMN *p;
+{
+ register int c, chars;
+ int last_input_position;
+
+ c = getc (p->fp);
+
+ last_input_position = input_position;
+ switch (c)
+ {
+ case '\f':
+ hold_file (p);
+ return TRUE;
+ case EOF:
+ close_file (p);
+ return TRUE;
+ case '\n':
+ break;
+ default:
+ chars = char_to_clump (c);
+ }
+
+ if (truncate_lines && input_position > chars_per_column)
+ {
+ input_position = last_input_position;
+ return FALSE;
+ }
+
+ if (p->char_func != store_char)
+ {
+ pad_vertically = TRUE;
+
+ if (print_a_header)
+ print_header ();
+
+ if (padding_not_printed != ANYWHERE)
+ {
+ pad_across_to (padding_not_printed);
+ padding_not_printed = ANYWHERE;
+ }
+
+ if (use_column_separator)
+ print_separators ();
+ }
+
+ if (p->numbered)
+ number (p);
+
+ if (c == '\n')
+ return TRUE;
+
+ print_clump (p, chars, clump_buff);
+
+ for (;;)
+ {
+ c = getc (p->fp);
+
+ switch (c)
+ {
+ case '\n':
+ return TRUE;
+ case '\f':
+ hold_file (p);
+ return TRUE;
+ case EOF:
+ close_file (p);
+ return TRUE;
+ }
+
+ last_input_position = input_position;
+ chars = char_to_clump (c);
+ if (truncate_lines && input_position > chars_per_column)
+ {
+ input_position = last_input_position;
+ return FALSE;
+ }
+
+ print_clump (p, chars, clump_buff);
+ }
+}
+
+/* Print a line from buff.
+
+ If this function has been called, we know we have something to
+ print. Therefore we set pad_vertically to TRUE, print
+ a header if necessary, pad across if necessary, and print
+ separators if necessary.
+
+ Return TRUE, meaning there is no need to call read_rest_of_line. */
+
+int
+print_stored (p)
+ COLUMN *p;
+{
+ int line = p->current_line++;
+ register char *first = &buff[line_vector[line]];
+ register char *last = &buff[line_vector[line + 1]];
+
+ pad_vertically = TRUE;
+
+ if (print_a_header)
+ print_header ();
+
+ if (padding_not_printed != ANYWHERE)
+ {
+ pad_across_to (padding_not_printed);
+ padding_not_printed = ANYWHERE;
+ }
+
+ if (use_column_separator)
+ print_separators ();
+
+ while (first != last)
+ print_char (*first++);
+
+ if (spaces_not_printed == 0)
+ output_position = p->start_position + end_vector[line];
+
+ return TRUE;
+}
+
+/* Convert a character to the proper format and return the number of
+ characters in the resulting clump. Increment input_position by
+ the width of the clump.
+
+ Tabs are converted to clumps of spaces.
+ Nonprintable characters may be converted to clumps of escape
+ sequences or control prefixes.
+
+ Note: the width of a clump is not necessarily equal to the number of
+ characters in clump_buff. (e.g, the width of '\b' is -1, while the
+ number of characters is 1.) */
+
+int
+char_to_clump (c)
+ int c;
+{
+ register int *s = clump_buff;
+ register int i;
+ char esc_buff[4];
+ int width;
+ int chars;
+
+ if (c == input_tab_char)
+ {
+ width = tab_width (chars_per_input_tab, input_position);
+
+ if (untabify_input)
+ {
+ for (i = width; i; --i)
+ *s++ = ' ';
+ chars = width;
+ }
+ else
+ {
+ *s = c;
+ chars = 1;
+ }
+
+ }
+ else if (!ISPRINT (c))
+ {
+ if (use_esc_sequence)
+ {
+ width = 4;
+ chars = 4;
+ *s++ = '\\';
+ sprintf (esc_buff, "%03o", c);
+ for (i = 0; i <= 2; ++i)
+ *s++ = (int) esc_buff[i];
+ }
+ else if (use_cntrl_prefix)
+ {
+ if (c < 0200)
+ {
+ width = 2;
+ chars = 2;
+ *s++ = '^';
+ *s++ = c ^ 0100;
+ }
+ else
+ {
+ width = 4;
+ chars = 4;
+ *s++ = '\\';
+ sprintf (esc_buff, "%03o", c);
+ for (i = 0; i <= 2; ++i)
+ *s++ = (int) esc_buff[i];
+ }
+ }
+ else if (c == '\b')
+ {
+ width = -1;
+ chars = 1;
+ *s = c;
+ }
+ else
+ {
+ width = 0;
+ chars = 1;
+ *s = c;
+ }
+ }
+ else
+ {
+ width = 1;
+ chars = 1;
+ *s = c;
+ }
+
+ input_position += width;
+ return chars;
+}
+
+/* We've just printed some files and need to clean up things before
+ looking for more options and printing the next batch of files.
+
+ Free everything we've xmalloc'ed, except `header'. */
+
+void
+cleanup ()
+{
+ if (number_buff)
+ free (number_buff);
+ if (clump_buff)
+ free (clump_buff);
+ if (column_vector)
+ free (column_vector);
+ if (line_vector)
+ free (line_vector);
+ if (end_vector)
+ free (end_vector);
+ if (buff)
+ free (buff);
+}
+
+/* Complain, print a usage message, and die. */
+
+void
+usage (reason)
+ char *reason;
+{
+ if (reason)
+ fprintf (stderr, "%s: %s\n", program_name, reason);
+
+ fprintf (stderr, "\
+Usage: %s [+PAGE] [-COLUMN] [-abcdfFmrtv] [-e[in-tab-char[in-tab-width]]]\n\
+ [-h header] [-i[out-tab-char[out-tab-width]]] [-l page-length]\n\
+ [-n[number-separator[digits]]] [-o left-margin]\n\
+ [-s[column-separator]] [-w page-width] [file...]\n",
+ program_name);
+ exit (2);
+}
diff --git a/src/sort.c b/src/sort.c
new file mode 100644
index 000000000..de8b937e2
--- /dev/null
+++ b/src/sort.c
@@ -0,0 +1,1746 @@
+/* sort - sort lines of text (with all kinds of options).
+ Copyright (C) 1988, 1991 Free Software Foundation
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Written December 1988 by Mike Haertel.
+ The author may be reached (Email) at the address mike@ai.mit.edu,
+ or (US mail) as Mike Haertel c/o Free Software Foundation. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <sys/types.h>
+#include <signal.h>
+#include <stdio.h>
+#include "system.h"
+#ifdef _POSIX_VERSION
+#include <limits.h>
+#else
+#ifndef UCHAR_MAX
+#define UCHAR_MAX 255
+#endif
+#endif
+#ifndef STDC_HEADERS
+char *malloc ();
+char *realloc ();
+void free ();
+#endif
+
+void error ();
+static void usage ();
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define UCHAR_LIM (UCHAR_MAX + 1)
+#define UCHAR(c) ((unsigned char) (c))
+
+#ifdef isascii
+#define ISALNUM(c) (isascii(c) && isalnum(c))
+#define ISDIGIT(c) (isascii(c) && isdigit(c))
+#define ISPRINT(c) (isascii(c) && isprint(c))
+#define ISLOWER(c) (isascii(c) && islower(c))
+#else
+#define ISALNUM(c) isalnum(c)
+#define ISDIGIT(c) isdigit(c)
+#define ISPRINT(c) isprint(c)
+#define ISLOWER(c) islower(c)
+#endif
+
+/* The kind of blanks for '-b' to skip in various options. */
+enum blanktype { bl_start, bl_end, bl_both };
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Table of digits. */
+static int digits[UCHAR_LIM];
+
+/* Table of white space. */
+static int blanks[UCHAR_LIM];
+
+/* Table of non-printing characters. */
+static int nonprinting[UCHAR_LIM];
+
+/* Table of non-dictionary characters (not letters, digits, or blanks). */
+static int nondictionary[UCHAR_LIM];
+
+/* Translation table folding lower case to upper. */
+static char fold_toupper[UCHAR_LIM];
+
+/* Table mapping 3-letter month names to integers.
+ Alphabetic order allows binary search. */
+static struct month
+{
+ char *name;
+ int val;
+} monthtab[] =
+{
+ "APR", 4,
+ "AUG", 8,
+ "DEC", 12,
+ "FEB", 2,
+ "JAN", 1,
+ "JUL", 7,
+ "JUN", 6,
+ "MAR", 3,
+ "MAY", 5,
+ "NOV", 11,
+ "OCT", 10,
+ "SEP", 9
+};
+
+/* During the merge phase, the number of files to merge at once. */
+#define NMERGE 16
+
+/* Initial buffer size for in core sorting. Will not grow unless a
+ line longer than this is seen. */
+static int sortalloc = 524288;
+
+/* Initial buffer size for in core merge buffers. Bear in mind that
+ up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
+static int mergealloc = 16384;
+
+/* Guess of average line length. */
+static int linelength = 30;
+
+/* Maximum number of elements for the array(s) of struct line's, in bytes. */
+#define LINEALLOC 262144
+
+/* Prefix for temporary file names. */
+static char *prefix;
+
+/* Flag to reverse the order of all comparisons. */
+static int reverse;
+
+/* Flag for stable sort. This turns off the last ditch bytewise
+ comparison of lines, and instead leaves lines in the same order
+ they were read if all keys compare equal. */
+static int stable;
+
+/* Tab character separating fields. If NUL, then fields are separated
+ by the empty string between a non-whitespace character and a whitespace
+ character. */
+static char tab;
+
+/* Flag to remove consecutive duplicate lines from the output.
+ Only the last of a sequence of equal lines will be output. */
+static int unique;
+
+/* Nonzero if any of the input files are the standard input. */
+static int have_read_stdin;
+
+/* Lines are held in core as counted strings. */
+struct line
+{
+ char *text; /* Text of the line. */
+ int length; /* Length not including final newline. */
+ char *keybeg; /* Start of first key. */
+ char *keylim; /* Limit of first key. */
+};
+
+/* Arrays of lines. */
+struct lines
+{
+ struct line *lines; /* Dynamically allocated array of lines. */
+ int used; /* Number of slots used. */
+ int alloc; /* Number of slots allocated. */
+ int limit; /* Max number of slots to allocate. */
+};
+
+/* Input buffers. */
+struct buffer
+{
+ char *buf; /* Dynamically allocated buffer. */
+ int used; /* Number of bytes used. */
+ int alloc; /* Number of bytes allocated. */
+ int left; /* Number of bytes left after line parsing. */
+};
+
+/* Lists of key field comparisons to be tried. */
+static struct keyfield
+{
+ int sword; /* Zero-origin 'word' to start at. */
+ int schar; /* Additional characters to skip. */
+ int skipsblanks; /* Skip leading white space at start. */
+ int eword; /* Zero-origin first word after field. */
+ int echar; /* Additional characters in field. */
+ int skipeblanks; /* Skip trailing white space at finish. */
+ int *ignore; /* Boolean array of characters to ignore. */
+ char *translate; /* Translation applied to characters. */
+ int numeric; /* Flag for numeric comparison. */
+ int month; /* Flag for comparison by month name. */
+ int reverse; /* Reverse the sense of comparison. */
+ struct keyfield *next; /* Next keyfield to try. */
+} keyhead;
+
+/* The list of temporary files. */
+static struct tempnode
+{
+ char *name;
+ struct tempnode *next;
+} temphead;
+
+/* Clean up any remaining temporary files. */
+
+static void
+cleanup ()
+{
+ struct tempnode *node;
+
+ for (node = temphead.next; node; node = node->next)
+ unlink (node->name);
+}
+
+/* Allocate N bytes of memory dynamically, with error checking. */
+
+char *
+xmalloc (n)
+ unsigned n;
+{
+ char *p;
+
+ p = malloc (n);
+ if (p == 0)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ exit (2);
+ }
+ return p;
+}
+
+/* Change the size of an allocated block of memory P to N bytes,
+ with error checking.
+ If P is NULL, run xmalloc.
+ If N is 0, run free and return NULL. */
+
+char *
+xrealloc (p, n)
+ char *p;
+ unsigned n;
+{
+ if (p == 0)
+ return xmalloc (n);
+ if (n == 0)
+ {
+ free (p);
+ return 0;
+ }
+ p = realloc (p, n);
+ if (p == 0)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ exit (2);
+ }
+ return p;
+}
+
+static FILE *
+xfopen (file, how)
+ char *file, *how;
+{
+ FILE *fp = strcmp (file, "-") ? fopen (file, how) : stdin;
+
+ if (fp == 0)
+ {
+ error (0, errno, "%s", file);
+ cleanup ();
+ exit (2);
+ }
+ if (fp == stdin)
+ have_read_stdin = 1;
+ return fp;
+}
+
+static void
+xfclose (fp)
+ FILE *fp;
+{
+ fflush (fp);
+ if (fp != stdin && fp != stdout)
+ {
+ if (fclose (fp) != 0)
+ {
+ error (0, errno, "error closing file");
+ cleanup ();
+ exit (2);
+ }
+ }
+ else
+ /* Allow reading stdin from tty more than once. */
+ clearerr (fp);
+}
+
+static void
+xfwrite (buf, size, nelem, fp)
+ char *buf;
+ int size, nelem;
+ FILE *fp;
+{
+ if (fwrite (buf, size, nelem, fp) != nelem)
+ {
+ error (0, errno, "write error");
+ cleanup ();
+ exit (2);
+ }
+}
+
+/* Return a name for a temporary file. */
+
+static char *
+tempname ()
+{
+ static int seq;
+ int len = strlen (prefix);
+ char *name = xmalloc (len + 16);
+ struct tempnode *node =
+ (struct tempnode *) xmalloc (sizeof (struct tempnode));
+
+ if (len && prefix[len - 1] != '/')
+ sprintf (name, "%s/sort%5.5d%5.5d", prefix, getpid (), ++seq);
+ else
+ sprintf (name, "%ssort%5.5d%5.5d", prefix, getpid (), ++seq);
+ node->name = name;
+ node->next = temphead.next;
+ temphead.next = node;
+ return name;
+}
+
+/* Search through the list of temporary files for NAME;
+ remove it if it is found on the list. */
+
+static void
+zaptemp (name)
+ char *name;
+{
+ struct tempnode *node, *temp;
+
+ for (node = &temphead; node->next; node = node->next)
+ if (!strcmp (name, node->next->name))
+ break;
+ if (node->next)
+ {
+ temp = node->next;
+ unlink (temp->name);
+ free (temp->name);
+ node->next = temp->next;
+ free ((char *) temp);
+ }
+}
+
+/* Initialize the character class tables. */
+
+static void
+inittables ()
+{
+ int i;
+
+ for (i = 0; i < UCHAR_LIM; ++i)
+ {
+ if (isblank (i))
+ blanks[i] = 1;
+ if (ISDIGIT (i))
+ digits[i] = 1;
+ if (!ISPRINT (i))
+ nonprinting[i] = 1;
+ if (!ISALNUM (i) && !isblank (i))
+ nondictionary[i] = 1;
+ if (ISLOWER (i))
+ fold_toupper[i] = toupper (i);
+ else
+ fold_toupper[i] = i;
+ }
+}
+
+/* Initialize BUF, allocating ALLOC bytes initially. */
+
+static void
+initbuf (buf, alloc)
+ struct buffer *buf;
+ int alloc;
+{
+ buf->alloc = alloc;
+ buf->buf = xmalloc (buf->alloc);
+ buf->used = buf->left = 0;
+}
+
+/* Fill BUF reading from FP, moving buf->left bytes from the end
+ of buf->buf to the beginning first. If EOF is reached and the
+ file wasn't terminated by a newline, supply one. Return a count
+ of bytes buffered. */
+
+static int
+fillbuf (buf, fp)
+ struct buffer *buf;
+ FILE *fp;
+{
+ int cc;
+
+ bcopy (buf->buf + buf->used - buf->left, buf->buf, buf->left);
+ buf->used = buf->left;
+
+ while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, '\n', buf->used)))
+ {
+ if (buf->used == buf->alloc)
+ {
+ buf->alloc *= 2;
+ buf->buf = xrealloc (buf->buf, buf->alloc);
+ }
+ cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
+ if (ferror (fp))
+ {
+ error (0, errno, "read error");
+ cleanup ();
+ exit (2);
+ }
+ buf->used += cc;
+ }
+
+ if (feof (fp) && buf->used && buf->buf[buf->used - 1] != '\n')
+ {
+ if (buf->used == buf->alloc)
+ {
+ buf->alloc *= 2;
+ buf->buf = xrealloc (buf->buf, buf->alloc);
+ }
+ buf->buf[buf->used++] = '\n';
+ }
+
+ return buf->used;
+}
+
+/* Initialize LINES, allocating space for ALLOC lines initially.
+ LIMIT is the maximum possible number of lines to allocate space
+ for, ever. */
+
+static void
+initlines (lines, alloc, limit)
+ struct lines *lines;
+ int alloc;
+ int limit;
+{
+ lines->alloc = alloc;
+ lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
+ lines->used = 0;
+ lines->limit = limit;
+}
+
+/* Return a pointer to the first character of the field specified
+ by KEY in LINE. */
+
+static char *
+begfield (line, key)
+ struct line *line;
+ struct keyfield *key;
+{
+ register char *ptr = line->text, *lim = ptr + line->length;
+ register int sword = key->sword, schar = key->schar;
+
+ if (tab)
+ while (ptr < lim && sword--)
+ {
+ while (ptr < lim && *ptr != tab)
+ ++ptr;
+ if (ptr < lim)
+ ++ptr;
+ }
+ else
+ while (ptr < lim && sword--)
+ {
+ while (ptr < lim && blanks[UCHAR (*ptr)])
+ ++ptr;
+ while (ptr < lim && !blanks[UCHAR (*ptr)])
+ ++ptr;
+ }
+
+ if (key->skipsblanks)
+ while (ptr < lim && blanks[UCHAR (*ptr)])
+ ++ptr;
+
+ while (ptr < lim && schar--)
+ ++ptr;
+
+ return ptr;
+}
+
+/* Return the limit of (a pointer to the first character after) the field
+ in LINE specified by KEY. */
+
+static char *
+limfield (line, key)
+ struct line *line;
+ struct keyfield *key;
+{
+ register char *ptr = line->text, *lim = ptr + line->length;
+ register int eword = key->eword, echar = key->echar;
+
+ if (tab)
+ while (ptr < lim && eword--)
+ {
+ while (ptr < lim && *ptr != tab)
+ ++ptr;
+ if (ptr < lim && (eword || key->skipeblanks))
+ ++ptr;
+ }
+ else
+ while (ptr < lim && eword--)
+ {
+ while (ptr < lim && blanks[UCHAR (*ptr)])
+ ++ptr;
+ while (ptr < lim && !blanks[UCHAR (*ptr)])
+ ++ptr;
+ }
+
+ if (key->skipeblanks)
+ while (ptr < lim && blanks[UCHAR (*ptr)])
+ ++ptr;
+
+ while (ptr < lim && echar--)
+ ++ptr;
+
+ return ptr;
+}
+
+/* Find the lines in BUF, storing pointers and lengths in LINES.
+ Also replace newlines with NULs. */
+
+static void
+findlines (buf, lines)
+ struct buffer *buf;
+ struct lines *lines;
+{
+ register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
+ struct keyfield *key = keyhead.next;
+
+ lines->used = 0;
+
+ while (beg < lim && (ptr = memchr (beg, '\n', lim - beg))
+ && lines->used < lines->limit)
+ {
+ /* There are various places in the code that rely on a NUL
+ being at the end of in-core lines; NULs inside the lines
+ will not cause trouble, though. */
+ *ptr = '\0';
+
+ if (lines->used == lines->alloc)
+ {
+ lines->alloc *= 2;
+ lines->lines = (struct line *)
+ xrealloc ((char *) lines->lines,
+ lines->alloc * sizeof (struct line));
+ }
+
+ lines->lines[lines->used].text = beg;
+ lines->lines[lines->used].length = ptr - beg;
+
+ /* Precompute the position of the first key for efficiency. */
+ if (key)
+ {
+ if (key->eword >= 0)
+ lines->lines[lines->used].keylim =
+ limfield (&lines->lines[lines->used], key);
+ else
+ lines->lines[lines->used].keylim = ptr;
+
+ if (key->sword >= 0)
+ lines->lines[lines->used].keybeg =
+ begfield (&lines->lines[lines->used], key);
+ else
+ {
+ if (key->skipsblanks)
+ while (blanks[UCHAR (*beg)])
+ ++beg;
+ lines->lines[lines->used].keybeg = beg;
+ }
+ }
+
+ ++lines->used;
+ beg = ptr + 1;
+ }
+
+ buf->left = lim - beg;
+}
+
+/* Compare strings A and B containing decimal fractions < 1. Each string
+ should begin with a decimal point followed immediately by the digits
+ of the fraction. Strings not of this form are considered to be zero. */
+
+static int
+fraccompare (a, b)
+ register char *a, *b;
+{
+ register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
+
+ if (tmpa == '.' && tmpb == '.')
+ {
+ do
+ tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
+ while (tmpa == tmpb && digits[tmpa]);
+ if (digits[tmpa] && digits[tmpb])
+ return tmpa - tmpb;
+ if (digits[tmpa])
+ {
+ while (tmpa == '0')
+ tmpa = UCHAR (*++a);
+ if (digits[tmpa])
+ return 1;
+ return 0;
+ }
+ if (digits[tmpb])
+ {
+ while (tmpb == '0')
+ tmpb = UCHAR (*++b);
+ if (digits[tmpb])
+ return -1;
+ return 0;
+ }
+ return 0;
+ }
+ else if (tmpa == '.')
+ {
+ do
+ tmpa = UCHAR (*++a);
+ while (tmpa == '0');
+ if (digits[tmpa])
+ return 1;
+ return 0;
+ }
+ else if (tmpb == '.')
+ {
+ do
+ tmpb = UCHAR (*++b);
+ while (tmpb == '0');
+ if (digits[tmpb])
+ return -1;
+ return 0;
+ }
+ return 0;
+}
+
+/* Compare strings A and B as numbers without explicitly converting them to
+ machine numbers. Comparatively slow for short strings, but asymptotically
+ hideously fast. */
+
+static int
+numcompare (a, b)
+ register char *a, *b;
+{
+ register int tmpa, tmpb, loga, logb, tmp;
+
+ tmpa = UCHAR (*a), tmpb = UCHAR (*b);
+
+ if (tmpa == '-')
+ {
+ tmpa = UCHAR (*++a);
+ if (tmpb != '-')
+ {
+ if (digits[tmpa] && digits[tmpb])
+ return -1;
+ return 0;
+ }
+ tmpb = UCHAR (*++b);
+
+ while (tmpa == '0')
+ tmpa = UCHAR (*++a);
+ while (tmpb == '0')
+ tmpb = UCHAR (*++b);
+
+ while (tmpa == tmpb && digits[tmpa])
+ tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
+
+ if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
+ return -fraccompare (a, b);
+
+ if (digits[tmpa])
+ for (loga = 1; digits[UCHAR (*++a)]; ++loga)
+ ;
+ else
+ loga = 0;
+
+ if (digits[tmpb])
+ for (logb = 1; digits[UCHAR (*++b)]; ++logb)
+ ;
+ else
+ logb = 0;
+
+ if (tmp = logb - loga)
+ return tmp;
+
+ if (!loga)
+ return 0;
+
+ return tmpb - tmpa;
+ }
+ else if (tmpb == '-')
+ {
+ if (digits[UCHAR (tmpa)] && digits[UCHAR (*++b)])
+ return 1;
+ return 0;
+ }
+ else
+ {
+ while (tmpa == '0')
+ tmpa = UCHAR (*++a);
+ while (tmpb == '0')
+ tmpb = UCHAR (*++b);
+
+ while (tmpa == tmpb && digits[tmpa])
+ tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
+
+ if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
+ return fraccompare (a, b);
+
+ if (digits[tmpa])
+ for (loga = 1; digits[UCHAR (*++a)]; ++loga)
+ ;
+ else
+ loga = 0;
+
+ if (digits[tmpb])
+ for (logb = 1; digits[UCHAR (*++b)]; ++logb)
+ ;
+ else
+ logb = 0;
+
+ if (tmp = loga - logb)
+ return tmp;
+
+ if (!loga)
+ return 0;
+
+ return tmpa - tmpb;
+ }
+}
+
+/* Return an integer <= 12 associated with month name S with length LEN,
+ 0 if the name in S is not recognized. */
+
+static int
+getmonth (s, len)
+ char *s;
+ int len;
+{
+ char month[4];
+ register int i, lo = 0, hi = 12;
+
+ if (len < 3)
+ return 0;
+
+ for (i = 0; i < 3; ++i)
+ month[i] = fold_toupper[UCHAR (s[i])];
+ month[3] = '\0';
+
+ while (hi - lo > 1)
+ if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
+ hi = (lo + hi) / 2;
+ else
+ lo = (lo + hi) / 2;
+ if (!strcmp (month, monthtab[lo].name))
+ return monthtab[lo].val;
+ return 0;
+}
+
+/* Compare two lines A and B trying every key in sequence until there
+ are no more keys or a difference is found. */
+
+static int
+keycompare (a, b)
+ struct line *a, *b;
+{
+ register char *texta, *textb, *lima, *limb, *translate;
+ register int *ignore;
+ struct keyfield *key;
+ int diff = 0, iter = 0, lena, lenb;
+
+ for (key = keyhead.next; key; key = key->next, ++iter)
+ {
+ ignore = key->ignore;
+ translate = key->translate;
+
+ /* Find the beginning and limit of each field. */
+ if (iter || a->keybeg == NULL || b->keybeg == NULL)
+ {
+ if (key->eword >= 0)
+ lima = limfield (a, key), limb = limfield (b, key);
+ else
+ lima = a->text + a->length, limb = b->text + b->length;
+
+ if (key->sword >= 0)
+ texta = begfield (a, key), textb = begfield (b, key);
+ else
+ {
+ texta = a->text, textb = b->text;
+ if (key->skipsblanks)
+ {
+ while (texta < lima && blanks[UCHAR (*texta)])
+ ++texta;
+ while (textb < limb && blanks[UCHAR (*textb)])
+ ++textb;
+ }
+ }
+ }
+ else
+ {
+ /* For the first iteration only, the key positions have
+ been precomputed for us. */
+ texta = a->keybeg, lima = a->keylim;
+ textb = b->keybeg, limb = b->keylim;
+ }
+
+ /* Find the lengths. */
+ lena = lima - texta, lenb = limb - textb;
+ if (lena < 0)
+ lena = 0;
+ if (lenb < 0)
+ lenb = 0;
+
+ /* Actually compare the fields. */
+ if (key->numeric)
+ {
+ if (*lima || *limb)
+ {
+ char savea = *lima, saveb = *limb;
+
+ *lima = *limb = '\0';
+ diff = numcompare (texta, textb);
+ *lima = savea, *limb = saveb;
+ }
+ else
+ diff = numcompare (texta, textb);
+
+ if (diff)
+ return key->reverse ? -diff : diff;
+ continue;
+ }
+ else if (key->month)
+ {
+ diff = getmonth (texta, lena) - getmonth (textb, lenb);
+ if (diff)
+ return key->reverse ? -diff : diff;
+ continue;
+ }
+ else if (ignore && translate)
+ while (texta < lima && textb < limb)
+ {
+ while (texta < lima && ignore[UCHAR (*texta)])
+ ++texta;
+ while (textb < limb && ignore[UCHAR (*textb)])
+ ++textb;
+ if (texta < lima && textb < limb &&
+ translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
+ {
+ diff = translate[UCHAR (*--texta)] - translate[UCHAR (*--textb)];
+ break;
+ }
+ }
+ else if (ignore)
+ while (texta < lima && textb < limb)
+ {
+ while (texta < lima && ignore[UCHAR (*texta)])
+ ++texta;
+ while (textb < limb && ignore[UCHAR (*textb)])
+ ++textb;
+ if (texta < lima && textb < limb && *texta++ != *textb++)
+ {
+ diff = *--texta - *--textb;
+ break;
+ }
+ }
+ else if (translate)
+ while (texta < lima && textb < limb)
+ {
+ if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
+ {
+ diff = translate[UCHAR (*--texta)] - translate[UCHAR (*--textb)];
+ break;
+ }
+ }
+ else
+ diff = memcmp (texta, textb, min (lena, lenb));
+
+ if (diff)
+ return key->reverse ? -diff : diff;
+ if (diff = lena - lenb)
+ return key->reverse ? -diff : diff;
+ }
+
+ return 0;
+}
+
+/* Compare two lines A and B, returning negative, zero, or positive
+ depending on whether A compares less than, equal to, or greater than B. */
+
+static int
+compare (a, b)
+ register struct line *a, *b;
+{
+ int diff, tmpa, tmpb, mini;
+
+ if (keyhead.next)
+ {
+ diff = keycompare (a, b);
+ if (diff)
+ return diff;
+ if (!unique && !stable)
+ {
+ tmpa = a->length, tmpb = b->length;
+ diff = memcmp (a->text, b->text, min (tmpa, tmpb));
+ if (!diff)
+ diff = tmpa - tmpb;
+ }
+ }
+ else
+ {
+ tmpa = a->length, tmpb = b->length;
+ mini = min (tmpa, tmpb);
+ if (mini == 0)
+ diff = tmpa - tmpb;
+ else
+ {
+ char *ap = a->text, *bp = b->text;
+
+ diff = *ap - *bp;
+ if (diff == 0)
+ {
+ diff = memcmp (ap, bp, mini);
+ if (diff == 0)
+ diff = tmpa - tmpb;
+ }
+ }
+ }
+
+ return reverse ? -diff : diff;
+}
+
+/* Check that the lines read from the given FP come in order. Return
+ 1 if they do and 0 if there is a disorder. */
+
+static int
+checkfp (fp)
+ FILE *fp;
+{
+ struct buffer buf; /* Input buffer. */
+ struct lines lines; /* Lines scanned from the buffer. */
+ struct line temp; /* Copy of previous line. */
+ int cc; /* Character count. */
+ int cmp; /* Result of calling compare. */
+ int alloc, i, success = 1;
+
+ initbuf (&buf, mergealloc);
+ initlines (&lines, mergealloc / linelength + 1,
+ LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
+ alloc = linelength;
+ temp.text = xmalloc (alloc);
+
+ cc = fillbuf (&buf, fp);
+ findlines (&buf, &lines);
+
+ if (cc)
+ do
+ {
+ /* Compare each line in the buffer with its successor. */
+ for (i = 0; i < lines.used - 1; ++i)
+ {
+ cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
+ if ((unique && cmp >= 0) || (cmp > 0))
+ {
+ success = 0;
+ goto finish;
+ }
+ }
+
+ /* Save the last line of the buffer and refill the buffer. */
+ if (lines.lines[lines.used - 1].length > alloc)
+ {
+ while (lines.lines[lines.used - 1].length + 1 > alloc)
+ alloc *= 2;
+ temp.text = xrealloc (temp.text, alloc);
+ }
+ bcopy (lines.lines[lines.used - 1].text, temp.text,
+ lines.lines[lines.used - 1].length + 1);
+ temp.length = lines.lines[lines.used - 1].length;
+
+ cc = fillbuf (&buf, fp);
+ if (cc)
+ {
+ findlines (&buf, &lines);
+ /* Make sure the line saved from the old buffer contents is
+ less than or equal to the first line of the new buffer. */
+ cmp = compare (&temp, &lines.lines[0]);
+ if ((unique && cmp >= 0) || (cmp > 0))
+ {
+ success = 0;
+ break;
+ }
+ }
+ }
+ while (cc);
+
+finish:
+ xfclose (fp);
+ free (buf.buf);
+ free ((char *) lines.lines);
+ free (temp.text);
+ return success;
+}
+
+/* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
+ Close FPS before returning. */
+
+static void
+mergefps (fps, nfps, ofp)
+ FILE *fps[], *ofp;
+ register int nfps;
+{
+ struct buffer buffer[NMERGE]; /* Input buffers for each file. */
+ struct lines lines[NMERGE]; /* Line tables for each buffer. */
+ struct line saved; /* Saved line for unique check. */
+ int savedflag = 0; /* True if there is a saved line. */
+ int savealloc; /* Size allocated for the saved line. */
+ int cur[NMERGE]; /* Current line in each line table. */
+ int ord[NMERGE]; /* Table representing a permutation of fps,
+ such that lines[ord[0]].lines[cur[ord[0]]]
+ is the smallest line and will be next
+ output. */
+ register int i, j, t;
+
+ /* Allocate space for a saved line if necessary. */
+ if (unique)
+ {
+ savealloc = linelength;
+ saved.text = xmalloc (savealloc);
+ }
+
+ /* Read initial lines from each input file. */
+ for (i = 0; i < nfps; ++i)
+ {
+ initbuf (&buffer[i], mergealloc);
+ /* If a file is empty, eliminate it from future consideration. */
+ while (i < nfps && !fillbuf (&buffer[i], fps[i]))
+ {
+ xfclose (fps[i]);
+ --nfps;
+ for (j = i; j < nfps; ++j)
+ fps[j] = fps[j + 1];
+ }
+ if (i == nfps)
+ free (buffer[i].buf);
+ else
+ {
+ initlines (&lines[i], mergealloc / linelength + 1,
+ LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
+ findlines (&buffer[i], &lines[i]);
+ cur[i] = 0;
+ }
+ }
+
+ /* Set up the ord table according to comparisons among input lines.
+ Since this only reorders two items if one is strictly greater than
+ the other, it is stable. */
+ for (i = 0; i < nfps; ++i)
+ ord[i] = i;
+ for (i = 1; i < nfps; ++i)
+ if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
+ &lines[ord[i]].lines[cur[ord[i]]]) > 0)
+ t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
+
+ /* Repeatedly output the smallest line until no input remains. */
+ while (nfps)
+ {
+ /* If uniqified output is turned out, output only the first of
+ an identical series of lines. */
+ if (unique)
+ {
+ if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
+ {
+ xfwrite (saved.text, 1, saved.length, ofp);
+ putc ('\n', ofp);
+ savedflag = 0;
+ }
+ if (!savedflag)
+ {
+ if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
+ {
+ while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
+ savealloc *= 2;
+ saved.text = xrealloc (saved.text, savealloc);
+ }
+ saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
+ bcopy (lines[ord[0]].lines[cur[ord[0]]].text, saved.text,
+ saved.length + 1);
+ savedflag = 1;
+ }
+ }
+ else
+ {
+ xfwrite (lines[ord[0]].lines[cur[ord[0]]].text, 1,
+ lines[ord[0]].lines[cur[ord[0]]].length, ofp);
+ putc ('\n', ofp);
+ }
+
+ /* Check if we need to read more lines into core. */
+ if (++cur[ord[0]] == lines[ord[0]].used)
+ if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
+ {
+ findlines (&buffer[ord[0]], &lines[ord[0]]);
+ cur[ord[0]] = 0;
+ }
+ else
+ {
+ /* We reached EOF on fps[ord[0]]. */
+ for (i = 1; i < nfps; ++i)
+ if (ord[i] > ord[0])
+ --ord[i];
+ --nfps;
+ xfclose (fps[ord[0]]);
+ free (buffer[ord[0]].buf);
+ free ((char *) lines[ord[0]].lines);
+ for (i = ord[0]; i < nfps; ++i)
+ {
+ fps[i] = fps[i + 1];
+ buffer[i] = buffer[i + 1];
+ lines[i] = lines[i + 1];
+ cur[i] = cur[i + 1];
+ }
+ for (i = 0; i < nfps; ++i)
+ ord[i] = ord[i + 1];
+ continue;
+ }
+
+ /* The new line just read in may be larger than other lines
+ already in core; push it back in the queue until we encounter
+ a line larger than it. */
+ for (i = 1; i < nfps; ++i)
+ {
+ t = compare (&lines[ord[0]].lines[cur[ord[0]]],
+ &lines[ord[i]].lines[cur[ord[i]]]);
+ if (!t)
+ t = ord[0] - ord[i];
+ if (t < 0)
+ break;
+ }
+ t = ord[0];
+ for (j = 1; j < i; ++j)
+ ord[j - 1] = ord[j];
+ ord[i - 1] = t;
+ }
+
+ if (unique && savedflag)
+ {
+ xfwrite (saved.text, 1, saved.length, ofp);
+ putc ('\n', ofp);
+ free (saved.text);
+ }
+}
+
+/* Sort the array LINES with NLINES members, using TEMP for temporary space. */
+
+static void
+sortlines (lines, nlines, temp)
+ struct line *lines, *temp;
+ int nlines;
+{
+ register struct line *lo, *hi, *t;
+ register int nlo, nhi;
+
+ if (nlines == 2)
+ {
+ if (compare (&lines[0], &lines[1]) > 0)
+ *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
+ return;
+ }
+
+ nlo = nlines / 2;
+ lo = lines;
+ nhi = nlines - nlo;
+ hi = lines + nlo;
+
+ if (nlo > 1)
+ sortlines (lo, nlo, temp);
+
+ if (nhi > 1)
+ sortlines (hi, nhi, temp);
+
+ t = temp;
+
+ while (nlo && nhi)
+ if (compare (lo, hi) <= 0)
+ *t++ = *lo++, --nlo;
+ else
+ *t++ = *hi++, --nhi;
+ while (nlo--)
+ *t++ = *lo++;
+
+ for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
+ *lo++ = *t++;
+}
+
+/* Check that each of the NFILES FILES is ordered.
+ Return a count of disordered files. */
+
+static int
+check (files, nfiles)
+ char *files[];
+ int nfiles;
+{
+ int i, disorders = 0;
+ FILE *fp;
+
+ for (i = 0; i < nfiles; ++i)
+ {
+ fp = xfopen (files[i], "r");
+ if (!checkfp (fp))
+ {
+ printf ("%s: disorder on %s\n", program_name, files[i]);
+ ++disorders;
+ }
+ }
+ return disorders;
+}
+
+/* Merge NFILES FILES onto OFP. */
+
+static void
+merge (files, nfiles, ofp)
+ char *files[];
+ int nfiles;
+ FILE *ofp;
+{
+ int i, j, t;
+ char *temp;
+ FILE *fps[NMERGE], *tfp;
+
+ while (nfiles > NMERGE)
+ {
+ t = 0;
+ for (i = 0; i < nfiles / NMERGE; ++i)
+ {
+ for (j = 0; j < NMERGE; ++j)
+ fps[j] = xfopen (files[i * NMERGE + j], "r");
+ tfp = xfopen (temp = tempname (), "w");
+ mergefps (fps, NMERGE, tfp);
+ xfclose (tfp);
+ for (j = 0; j < NMERGE; ++j)
+ zaptemp (files[i * NMERGE + j]);
+ files[t++] = temp;
+ }
+ for (j = 0; j < nfiles % NMERGE; ++j)
+ fps[j] = xfopen (files[i * NMERGE + j], "r");
+ tfp = xfopen (temp = tempname (), "w");
+ mergefps (fps, nfiles % NMERGE, tfp);
+ xfclose (tfp);
+ for (j = 0; j < nfiles % NMERGE; ++j)
+ zaptemp (files[i * NMERGE + j]);
+ files[t++] = temp;
+ nfiles = t;
+ }
+
+ for (i = 0; i < nfiles; ++i)
+ fps[i] = xfopen (files[i], "r");
+ mergefps (fps, i, ofp);
+ for (i = 0; i < nfiles; ++i)
+ zaptemp (files[i]);
+}
+
+/* Sort NFILES FILES onto OFP. */
+
+static void
+sort (files, nfiles, ofp)
+ char **files;
+ int nfiles;
+ FILE *ofp;
+{
+ struct buffer buf;
+ struct lines lines;
+ struct line *tmp;
+ int i, ntmp;
+ FILE *fp, *tfp;
+ struct tempnode *node;
+ int ntemp = 0;
+ char **tempfiles;
+
+ initbuf (&buf, sortalloc);
+ initlines (&lines, sortalloc / linelength + 1,
+ LINEALLOC / sizeof (struct line));
+ ntmp = lines.alloc;
+ tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
+
+ while (nfiles--)
+ {
+ fp = xfopen (*files++, "r");
+ while (fillbuf (&buf, fp))
+ {
+ findlines (&buf, &lines);
+ if (lines.used > ntmp)
+ {
+ while (lines.used > ntmp)
+ ntmp *= 2;
+ tmp = (struct line *)
+ xrealloc ((char *) tmp, ntmp * sizeof (struct line));
+ }
+ sortlines (lines.lines, lines.used, tmp);
+ if (feof (fp) && !nfiles && !ntemp && !buf.left)
+ tfp = ofp;
+ else
+ {
+ ++ntemp;
+ tfp = xfopen (tempname (), "w");
+ }
+ for (i = 0; i < lines.used; ++i)
+ if (!unique || i == 0
+ || compare (&lines.lines[i], &lines.lines[i - 1]))
+ {
+ xfwrite (lines.lines[i].text, 1, lines.lines[i].length, tfp);
+ putc ('\n', tfp);
+ }
+ if (tfp != ofp)
+ xfclose (tfp);
+ }
+ xfclose (fp);
+ }
+
+ free (buf.buf);
+ free ((char *) lines.lines);
+ free ((char *) tmp);
+
+ if (ntemp)
+ {
+ tempfiles = (char **) xmalloc (ntemp * sizeof (char *));
+ i = ntemp;
+ for (node = temphead.next; node; node = node->next)
+ tempfiles[--i] = node->name;
+ merge (tempfiles, ntemp, ofp);
+ free ((char *) tempfiles);
+ }
+}
+
+/* Insert key KEY at the end of the list (`keyhead'). */
+
+static void
+insertkey (key)
+ struct keyfield *key;
+{
+ struct keyfield *k = &keyhead;
+
+ while (k->next)
+ k = k->next;
+ k->next = key;
+ key->next = NULL;
+}
+
+static void
+badfieldspec (s)
+ char *s;
+{
+ error (2, 0, "invalid field specification `%s'", s);
+}
+
+/* Handle interrupts and hangups. */
+
+static void
+sighandler (sig)
+ int sig;
+{
+#ifdef _POSIX_VERSION
+ struct sigaction sigact;
+
+ sigact.sa_handler = SIG_DFL;
+ sigemptyset (&sigact.sa_mask);
+ sigact.sa_flags = 0;
+ sigaction (sig, &sigact, NULL);
+#else /* !_POSIX_VERSION */
+ signal (sig, SIG_DFL);
+#endif /* _POSIX_VERSION */
+ cleanup ();
+ kill (getpid (), sig);
+}
+
+/* Set the ordering options for KEY specified in S.
+ Return the address of the first character in S that
+ is not a valid ordering option.
+ BLANKTYPE is the kind of blanks that 'b' should skip. */
+
+static char *
+set_ordering (s, key, blanktype)
+ register char *s;
+ struct keyfield *key;
+ enum blanktype blanktype;
+{
+ while (*s)
+ {
+ switch (*s)
+ {
+ case 'b':
+ if (blanktype == bl_start || blanktype == bl_both)
+ key->skipsblanks = 1;
+ if (blanktype == bl_end || blanktype == bl_both)
+ key->skipeblanks = 1;
+ break;
+ case 'd':
+ key->ignore = nondictionary;
+ break;
+ case 'f':
+ key->translate = fold_toupper;
+ break;
+#if 0
+ case 'g':
+ /* Reserved for comparing floating-point numbers. */
+ break;
+#endif
+ case 'i':
+ key->ignore = nonprinting;
+ break;
+ case 'M':
+ key->skipsblanks = key->skipeblanks = key->month = 1;
+ break;
+ case 'n':
+ key->skipsblanks = key->skipeblanks = key->numeric = 1;
+ break;
+ case 'r':
+ key->reverse = 1;
+ break;
+ default:
+ return s;
+ }
+ ++s;
+ }
+ return s;
+}
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ struct keyfield *key = NULL, gkey;
+ char *s;
+ int i, t, t2;
+ int checkonly = 0, mergeonly = 0, nfiles = 0;
+ char *minus = "-", *outfile = minus, **files, *tmp;
+ FILE *ofp;
+#ifdef _POSIX_VERSION
+ struct sigaction oldact, newact;
+#endif /* _POSIX_VERSION */
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+ inittables ();
+
+ prefix = getenv ("TMPDIR");
+ if (prefix == NULL)
+ prefix = "/tmp";
+
+#ifdef _POSIX_VERSION
+ newact.sa_handler = sighandler;
+ sigemptyset (&newact.sa_mask);
+ newact.sa_flags = 0;
+
+ sigaction (SIGINT, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGINT, &newact, NULL);
+ sigaction (SIGHUP, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGHUP, &newact, NULL);
+ sigaction (SIGPIPE, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGPIPE, &newact, NULL);
+ sigaction (SIGTERM, NULL, &oldact);
+ if (oldact.sa_handler != SIG_IGN)
+ sigaction (SIGTERM, &newact, NULL);
+#else /* !_POSIX_VERSION */
+ if (signal (SIGINT, SIG_IGN) != SIG_IGN)
+ signal (SIGINT, sighandler);
+ if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
+ signal (SIGHUP, sighandler);
+ if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
+ signal (SIGPIPE, sighandler);
+ if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
+ signal (SIGTERM, sighandler);
+#endif /* !_POSIX_VERSION */
+
+ gkey.sword = gkey.eword = -1;
+ gkey.ignore = NULL;
+ gkey.translate = NULL;
+ gkey.numeric = gkey.month = gkey.reverse = 0;
+ gkey.skipsblanks = gkey.skipeblanks = 0;
+
+ files = (char **) xmalloc (sizeof (char *) * argc);
+
+ for (i = 1; i < argc; ++i)
+ {
+ if (argv[i][0] == '+')
+ {
+ if (key)
+ insertkey (key);
+ key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
+ key->eword = -1;
+ key->ignore = NULL;
+ key->translate = NULL;
+ key->skipsblanks = key->skipeblanks = 0;
+ key->numeric = key->month = key->reverse = 0;
+ s = argv[i] + 1;
+ if (!digits[UCHAR (*s)])
+ badfieldspec (argv[i]);
+ for (t = 0; digits[UCHAR (*s)]; ++s)
+ t = 10 * t + *s - '0';
+ t2 = 0;
+ if (*s == '.')
+ for (++s; digits[UCHAR (*s)]; ++s)
+ t2 = 10 * t2 + *s - '0';
+ if (t2 || t)
+ {
+ key->sword = t;
+ key->schar = t2;
+ }
+ else
+ key->sword = -1;
+ s = set_ordering (s, key, bl_start);
+ if (*s)
+ badfieldspec (argv[i]);
+ }
+ else if (argv[i][0] == '-' && argv[i][1])
+ {
+ s = argv[i] + 1;
+ if (digits[UCHAR (*s)])
+ {
+ if (!key)
+ usage ();
+ for (t = 0; digits[UCHAR (*s)]; ++s)
+ t = t * 10 + *s - '0';
+ t2 = 0;
+ if (*s == '.')
+ for (++s; digits[UCHAR (*s)]; ++s)
+ t2 = t2 * 10 + *s - '0';
+ key->eword = t;
+ key->echar = t2;
+ s = set_ordering (s, key, bl_end);
+ if (*s)
+ badfieldspec (argv[i]);
+ insertkey (key);
+ key = NULL;
+ }
+ else
+ while (*s)
+ {
+ s = set_ordering (s, &gkey, bl_both);
+ switch (*s)
+ {
+ case '\0':
+ break;
+ case 'c':
+ checkonly = 1;
+ break;
+ case 'k':
+ if (s[1])
+ ++s;
+ else
+ {
+ if (i == argc - 1)
+ error (2, 0, "option `-k' requires an argument");
+ else
+ s = argv[++i];
+ }
+ if (key)
+ insertkey (key);
+ key = (struct keyfield *)
+ xmalloc (sizeof (struct keyfield));
+ key->eword = -1;
+ key->ignore = NULL;
+ key->translate = NULL;
+ key->skipsblanks = key->skipeblanks = 0;
+ key->numeric = key->month = key->reverse = 0;
+ /* Get POS1. */
+ if (!digits[UCHAR (*s)])
+ badfieldspec (argv[i]);
+ for (t = 0; digits[UCHAR (*s)]; ++s)
+ t = 10 * t + *s - '0';
+ if (t)
+ t--;
+ t2 = 0;
+ if (*s == '.')
+ {
+ for (++s; digits[UCHAR (*s)]; ++s)
+ t2 = 10 * t2 + *s - '0';
+ if (t2)
+ t2--;
+ }
+ if (t2 || t)
+ {
+ key->sword = t;
+ key->schar = t2;
+ }
+ else
+ key->sword = -1;
+ s = set_ordering (s, key, bl_start);
+ if (*s && *s != ',')
+ badfieldspec (argv[i]);
+ else if (*s++)
+ {
+ /* Get POS2. */
+ for (t = 0; digits[UCHAR (*s)]; ++s)
+ t = t * 10 + *s - '0';
+ t2 = 0;
+ if (*s == '.')
+ {
+ for (++s; digits[UCHAR (*s)]; ++s)
+ t2 = t2 * 10 + *s - '0';
+ if (t2)
+ t--;
+ }
+ key->eword = t;
+ key->echar = t2;
+ s = set_ordering (s, key, bl_end);
+ if (*s)
+ badfieldspec (argv[i]);
+ }
+ insertkey (key);
+ key = NULL;
+ goto outer;
+ case 'm':
+ mergeonly = 1;
+ break;
+ case 'o':
+ if (s[1])
+ outfile = s + 1;
+ else
+ {
+ if (i == argc - 1)
+ error (2, 0, "option `-o' requires an argument");
+ else
+ outfile = argv[++i];
+ }
+ goto outer;
+ case 's':
+ stable = 1;
+ break;
+ case 't':
+ if (s[1])
+ tab = *++s;
+ else if (i < argc - 1)
+ {
+ tab = *argv[++i];
+ goto outer;
+ }
+ else
+ error (2, 0, "option `-t' requires an argument");
+ break;
+ case 'u':
+ unique = 1;
+ break;
+ default:
+ fprintf (stderr, "%s: unrecognized option `-%c'\n",
+ argv[0], *s);
+ usage ();
+ }
+ if (*s)
+ ++s;
+ }
+ }
+ else /* Not an option. */
+ {
+ files[nfiles++] = argv[i];
+ }
+ outer:;
+ }
+
+ if (key)
+ insertkey (key);
+
+ /* Inheritance of global options to individual keys. */
+ for (key = keyhead.next; key; key = key->next)
+ if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
+ && !key->skipeblanks && !key->month && !key->numeric)
+ {
+ key->ignore = gkey.ignore;
+ key->translate = gkey.translate;
+ key->skipsblanks = gkey.skipsblanks;
+ key->skipeblanks = gkey.skipeblanks;
+ key->month = gkey.month;
+ key->numeric = gkey.numeric;
+ key->reverse = gkey.reverse;
+ }
+
+ if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
+ || gkey.reverse || gkey.skipeblanks
+ || gkey.month || gkey.numeric))
+ insertkey (&gkey);
+
+ if (nfiles == 0)
+ {
+ nfiles = 1;
+ files = &minus;
+ }
+
+ if (checkonly)
+ exit (check (files, nfiles) != 0);
+
+ if (strcmp (outfile, "-"))
+ {
+ for (i = 0; i < nfiles; ++i)
+ if (!strcmp (outfile, files[i]))
+ break;
+ if (i == nfiles)
+ ofp = xfopen (outfile, "w");
+ else
+ {
+ char buf[8192];
+ FILE *fp = xfopen (outfile, "r");
+ int cc;
+
+ tmp = tempname ();
+ ofp = xfopen (tmp, "w");
+ while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
+ xfwrite (buf, 1, cc, ofp);
+ if (ferror (fp))
+ {
+ error (0, errno, "%s", outfile);
+ cleanup ();
+ exit (2);
+ }
+ xfclose (ofp);
+ xfclose (fp);
+ files[i] = tmp;
+ ofp = xfopen (outfile, "w");
+ }
+ }
+ else
+ ofp = stdout;
+
+ if (mergeonly)
+ merge (files, nfiles, ofp);
+ else
+ sort (files, nfiles, ofp);
+ cleanup ();
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (ferror (stdout) || fclose (stdout) == EOF)
+ error (1, 0, "write error");
+
+ exit (0);
+}
+
+static void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-cmus] [-t separator] [-o output-file] [-bdfiMnr] [+POS1 [-POS2]]\n\
+ [-k POS1[,POS2]] [file...]\n",
+ program_name);
+ exit (2);
+}
diff --git a/src/split.c b/src/split.c
new file mode 100644
index 000000000..ccc4535c2
--- /dev/null
+++ b/src/split.c
@@ -0,0 +1,532 @@
+/* split.c -- split a file into pieces.
+ Copyright (C) 1988, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* By tege@sics.se, with rms.
+
+ To do:
+ * Implement -t CHAR or -t REGEX to specify break characters other
+ than newline. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include "system.h"
+
+char *xmalloc ();
+void error ();
+
+int convint ();
+int isdigits ();
+int stdread ();
+void line_bytes_split ();
+void bytes_split ();
+void cwrite ();
+void lines_split ();
+void next_file_name ();
+
+/* Name under which this program was invoked. */
+char *program_name;
+
+/* Base name of output files. */
+char *outfile;
+
+/* Pointer to the end of the prefix in OUTFILE.
+ Suffixes are inserted here. */
+char *outfile_mid;
+
+/* Pointer to the end of OUTFILE. */
+char *outfile_end;
+
+/* Status for outfile name generation. */
+unsigned outfile_count = -1;
+unsigned outfile_name_limit = 25 * 26;
+unsigned outfile_name_generation = 1;
+
+/* Name of input file. May be "-". */
+char *infile;
+
+/* Descriptor on which input file is open. */
+int input_desc;
+
+/* Descriptor on which output file is open. */
+int output_desc;
+
+void
+usage (reason)
+ char *reason;
+{
+ if (reason != NULL)
+ fprintf (stderr, "%s: %s\n", program_name, reason);
+ fprintf (stderr, "\
+Usage: %s [-lines] [-l lines] [-b bytes[bkm]] [-C bytes[bkm]]\n\
+ [--lines=lines] [--bytes=bytes[bkm]] [--line-bytes=bytes[bkm]]\n\
+ [infile [outfile-prefix]]\n",
+ program_name);
+ exit (2);
+}
+
+struct option longopts[] =
+{
+ {"bytes", 1, NULL, 'b'},
+ {"lines", 1, NULL, 'l'},
+ {"line-bytes", 1, NULL, 'C'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ struct stat stat_buf;
+ int num; /* numeric argument from command line */
+ enum
+ {
+ type_undef, type_bytes, type_byteslines, type_lines, type_digits
+ } split_type = type_undef;
+ int in_blk_size; /* optimal block size of input file device */
+ char *buf; /* file i/o buffer */
+ int accum = 0;
+ char *outbase;
+ int c;
+ int digits_optind = 0;
+
+ program_name = argv[0];
+
+ /* Parse command line options. */
+
+ infile = "-";
+ outbase = "x";
+
+ while (1)
+ {
+ /* This is the argv-index of the option we will read next. */
+ int this_optind = optind ? optind : 1;
+
+ c = getopt_long (argc, argv, "0123456789b:l:C:", longopts, (int *) 0);
+ if (c == EOF)
+ break;
+
+ switch (c)
+ {
+ case 'b':
+ if (split_type != type_undef)
+ usage ("cannot split in more than one way");
+ split_type = type_bytes;
+ if (convint (optarg, &accum) == -1)
+ usage ("invalid number of bytes");
+ break;
+
+ case 'l':
+ if (split_type != type_undef)
+ usage ("cannot split in more than one way");
+ split_type = type_lines;
+ if (!isdigits (optarg))
+ usage ("invalid number of lines");
+ accum = atoi (optarg);
+ break;
+
+ case 'C':
+ if (split_type != type_undef)
+ usage ("cannot split in more than one way");
+ split_type = type_byteslines;
+ if (convint (optarg, &accum) == -1)
+ usage ("invalid number of bytes");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (split_type != type_undef && split_type != type_digits)
+ usage ("cannot split in more than one way");
+ if (digits_optind != 0 && digits_optind != this_optind)
+ accum = 0; /* More than one number given; ignore other. */
+ digits_optind = this_optind;
+ split_type = type_digits;
+ accum = accum * 10 + c - '0';
+ break;
+
+ default:
+ usage ((char *)0);
+ }
+ }
+
+ /* Handle default case. */
+ if (split_type == type_undef)
+ {
+ split_type = type_lines;
+ accum = 1000;
+ }
+
+ if (accum < 1)
+ usage ("invalid number");
+ num = accum;
+
+ /* Get out the filename arguments. */
+
+ if (optind < argc)
+ infile = argv[optind++];
+
+ if (optind < argc)
+ outbase = argv[optind++];
+
+ if (optind < argc)
+ usage ("too many arguments");
+
+ /* Open the input file. */
+ if (!strcmp (infile, "-"))
+ input_desc = 0;
+ else
+ {
+ input_desc = open (infile, O_RDONLY);
+ if (input_desc < 0)
+ error (1, errno, "%s", infile);
+ }
+
+ /* No output file is open now. */
+ output_desc = -1;
+
+ /* Copy the output file prefix so we can add suffixes to it.
+ 26**29 is certainly enough output files! */
+
+ outfile = xmalloc (strlen (outbase) + 30);
+ strcpy (outfile, outbase);
+ outfile_mid = outfile + strlen (outfile);
+ outfile_end = outfile_mid + 2;
+ bzero (outfile_mid, 30);
+ outfile_mid[0] = 'a';
+ outfile_mid[1] = 'a' - 1; /* first call to next_file_name makes it an 'a' */
+
+ /* Get the optimal block size of input device and make a buffer. */
+
+ if (fstat (input_desc, &stat_buf) < 0)
+ error (1, errno, "%s", infile);
+ in_blk_size = ST_BLKSIZE (stat_buf);
+
+ buf = xmalloc (in_blk_size + 1);
+
+ switch (split_type)
+ {
+ case type_digits:
+ case type_lines:
+ lines_split (num, buf, in_blk_size);
+ break;
+
+ case type_bytes:
+ bytes_split (num, buf, in_blk_size);
+ break;
+
+ case type_byteslines:
+ line_bytes_split (num);
+ break;
+ }
+
+ if (close (input_desc) < 0)
+ error (1, errno, "%s", infile);
+ if (output_desc >= 0 && close (output_desc) < 0)
+ error (1, errno, "%s", outfile);
+
+ exit (0);
+}
+
+/* Return nonzero if the string STR is composed entirely of decimal digits. */
+
+int
+isdigits (str)
+ char *str;
+{
+ do
+ {
+ if (!isdigit (*str))
+ return 0;
+ str++;
+ }
+ while (*str);
+ return 1;
+}
+
+/* Put the value of the number in STR into *VAL.
+ STR can specify a positive integer, optionally ending in `k'
+ to mean kilo or `m' to mean mega.
+ Return 0 if STR is valid, -1 if not. */
+
+int
+convint (str, val)
+ char *str;
+ int *val;
+{
+ int multiplier = 1;
+ int arglen = strlen (str);
+
+ if (arglen > 1)
+ {
+ switch (str[arglen - 1])
+ {
+ case 'b':
+ multiplier = 512;
+ str[arglen - 1] = '\0';
+ break;
+ case 'k':
+ multiplier = 1024;
+ str[arglen - 1] = '\0';
+ break;
+ case 'm':
+ multiplier = 1048576;
+ str[arglen - 1] = '\0';
+ break;
+ }
+ }
+ if (!isdigits (str))
+ return -1;
+ *val = atoi (str) * multiplier;
+ return 0;
+}
+
+/* Split into pieces of exactly NCHARS bytes.
+ Use buffer BUF, whose size is BUFSIZE. */
+
+void
+bytes_split (nchars, buf, bufsize)
+ int nchars;
+ char *buf;
+ int bufsize;
+{
+ int n_read;
+ int new_file_flag = 1;
+ int to_read;
+ int to_write = nchars;
+ char *bp_out;
+
+ do
+ {
+ n_read = stdread (buf, bufsize);
+ if (n_read < 0)
+ error (1, errno, "%s", infile);
+ bp_out = buf;
+ to_read = n_read;
+ for (;;)
+ {
+ if (to_read < to_write)
+ {
+ if (to_read) /* do not write 0 bytes! */
+ {
+ cwrite (new_file_flag, bp_out, to_read);
+ to_write -= to_read;
+ new_file_flag = 0;
+ }
+ break;
+ }
+ else
+ {
+ cwrite (new_file_flag, bp_out, to_write);
+ bp_out += to_write;
+ to_read -= to_write;
+ new_file_flag = 1;
+ to_write = nchars;
+ }
+ }
+ }
+ while (n_read == bufsize);
+}
+
+/* Split into pieces of exactly NLINES lines.
+ Use buffer BUF, whose size is BUFSIZE. */
+
+void
+lines_split (nlines, buf, bufsize)
+ int nlines;
+ char *buf;
+ int bufsize;
+{
+ int n_read;
+ char *bp, *bp_out, *eob;
+ int new_file_flag = 1;
+ int n = 0;
+
+ do
+ {
+ n_read = stdread (buf, bufsize);
+ if (n_read < 0)
+ error (1, errno, "%s", infile);
+ bp = bp_out = buf;
+ eob = bp + n_read;
+ *eob = '\n';
+ for (;;)
+ {
+ while (*bp++ != '\n')
+ ; /* this semicolon takes most of the time */
+ if (bp > eob)
+ {
+ if (eob != bp_out) /* do not write 0 bytes! */
+ {
+ cwrite (new_file_flag, bp_out, eob - bp_out);
+ new_file_flag = 0;
+ }
+ break;
+ }
+ else
+ if (++n >= nlines)
+ {
+ cwrite (new_file_flag, bp_out, bp - bp_out);
+ bp_out = bp;
+ new_file_flag = 1;
+ n = 0;
+ }
+ }
+ }
+ while (n_read == bufsize);
+}
+
+/* Split into pieces that are as large as possible while still not more
+ than NCHARS bytes, and are split on line boundaries except
+ where lines longer than NCHARS bytes occur. */
+
+void
+line_bytes_split (nchars)
+ int nchars;
+{
+ int n_read;
+ char *bp;
+ int eof = 0;
+ int n_buffered = 0;
+ char *buf = (char *) xmalloc (nchars);
+
+ do
+ {
+ /* Fill up the full buffer size from the input file. */
+
+ n_read = stdread (buf + n_buffered, nchars - n_buffered);
+ if (n_read < 0)
+ error (1, errno, "%s", infile);
+
+ n_buffered += n_read;
+ if (n_buffered != nchars)
+ eof = 1;
+
+ /* Find where to end this chunk. */
+ bp = buf + n_buffered;
+ if (n_buffered == nchars)
+ {
+ while (bp > buf && bp[-1] != '\n')
+ bp--;
+ }
+
+ /* If chunk has no newlines, use all the chunk. */
+ if (bp == buf)
+ bp = buf + n_buffered;
+
+ /* Output the chars as one output file. */
+ cwrite (1, buf, bp - buf);
+
+ /* Discard the chars we just output; move rest of chunk
+ down to be the start of the next chunk. */
+ n_buffered -= bp - buf;
+ if (n_buffered > 0)
+ bcopy (bp, buf, n_buffered);
+ }
+ while (!eof);
+ free (buf);
+}
+
+/* Write BYTES bytes at BP to an output file.
+ If NEW_FILE_FLAG is nonzero, open the next output file.
+ Otherwise add to the same output file already in use. */
+
+void
+cwrite (new_file_flag, bp, bytes)
+ int new_file_flag;
+ char *bp;
+ int bytes;
+{
+ if (new_file_flag)
+ {
+ if (output_desc >= 0 && close (output_desc) < 0)
+ error (1, errno, "%s", outfile);
+
+ next_file_name ();
+ output_desc = open (outfile, O_WRONLY | O_CREAT | O_TRUNC, 0666);
+ if (output_desc < 0)
+ error (1, errno, "%s", outfile);
+ }
+ if (write (output_desc, bp, bytes) < 0)
+ error (1, errno, "%s", outfile);
+}
+
+/* Read NCHARS bytes from the input file into BUF.
+ Return the number of bytes successfully read.
+ If this is less than NCHARS, do not call `stdread' again. */
+
+int
+stdread (buf, nchars)
+ char *buf;
+ int nchars;
+{
+ int n_read;
+ int to_be_read = nchars;
+
+ while (to_be_read)
+ {
+ n_read = read (input_desc, buf, to_be_read);
+ if (n_read < 0)
+ return -1;
+ if (n_read == 0)
+ break;
+ to_be_read -= n_read;
+ buf += n_read;
+ }
+ return nchars - to_be_read;
+}
+
+/* Compute the next sequential output file name suffix and store it
+ into the string `outfile' at the position pointed to by `outfile_mid'. */
+
+void
+next_file_name ()
+{
+ int x;
+ char *ne;
+
+ outfile_count++;
+ if (outfile_count < outfile_name_limit)
+ {
+ for (ne = outfile_end - 1; ; ne--)
+ {
+ x = *ne;
+ if (x != 'z')
+ break;
+ *ne = 'a';
+ }
+ *ne = x + 1;
+ return;
+ }
+
+ outfile_count = 0;
+ outfile_name_limit *= 26;
+ outfile_name_generation++;
+ *outfile_mid++ = 'z';
+ for (x = 0; x <= outfile_name_generation; x++)
+ outfile_mid[x] = 'a';
+ outfile_end += 2;
+}
diff --git a/src/sum.c b/src/sum.c
new file mode 100644
index 000000000..9236614ca
--- /dev/null
+++ b/src/sum.c
@@ -0,0 +1,217 @@
+/* sum -- checksum and count the blocks in a file
+ Copyright (C) 1986, 1989, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Like BSD sum or SysV sum -r, except like SysV sum if -s option is given. */
+
+/* Written by Kayvan Aghaiepour and David MacKenzie. */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <getopt.h>
+#include "system.h"
+
+int bsd_sum_file ();
+int sysv_sum_file ();
+void error ();
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Nonzero if any of the files read were the standard input. */
+int have_read_stdin;
+
+/* Right-rotate 32-bit integer variable C. */
+#define ROTATE_RIGHT(c) if ((c) & 01) (c) = ((c) >>1) + 0x8000; else (c) >>= 1;
+
+struct option longopts[] =
+{
+ {"sysv", 0, NULL, 's'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int errors = 0;
+ int optc;
+ int files_given;
+ int (*sum_func) () = bsd_sum_file;
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+
+ while ((optc = getopt_long (argc, argv, "rs", longopts, (int *) 0)) != -1)
+ {
+ switch (optc)
+ {
+ case 'r': /* For SysV compatibility. */
+ sum_func = bsd_sum_file;
+ break;
+
+ case 's':
+ sum_func = sysv_sum_file;
+ break;
+
+ case '?':
+ fprintf (stderr, "\
+Usage: %s [-rs] [--sysv] [file...]\n", argv[0]);
+ exit (1);
+ }
+ }
+
+ files_given = argc - optind;
+ if (files_given == 0)
+ {
+ if ((*sum_func) ("-", files_given) < 0)
+ errors = 1;
+ }
+ else
+ for (; optind < argc; optind++)
+ if ((*sum_func) (argv[optind], files_given) < 0)
+ errors = 1;
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ exit (errors);
+}
+
+/* Calculate and print the rotated checksum and the size in 1K blocks
+ of file FILE, or of the standard input if FILE is "-".
+ If PRINT_NAME is >1, print FILE next to the checksum and size.
+ The checksum varies depending on sizeof(int).
+ Return 0 if successful, -1 if an error occurs. */
+
+int
+bsd_sum_file (file, print_name)
+ char *file;
+ int print_name;
+{
+ register FILE *fp;
+ register unsigned long checksum = 0; /* The checksum mod 2^16. */
+ register long total_bytes = 0; /* The number of bytes. */
+ register int ch; /* Each character read. */
+
+ if (!strcmp (file, "-"))
+ {
+ fp = stdin;
+ have_read_stdin = 1;
+ }
+ else
+ {
+ fp = fopen (file, "r");
+ if (fp == NULL)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+ }
+
+ /* This algorithm seems to depend on sign extension in `ch' in order to
+ give the right results. Ick. */
+ while ((ch = getc (fp)) != EOF)
+ {
+ total_bytes++;
+ ROTATE_RIGHT (checksum);
+ checksum += ch;
+ checksum &= 0xffff; /* Keep it within bounds. */
+ }
+
+ if (ferror (fp))
+ {
+ error (0, errno, "%s", file);
+ if (strcmp (file, "-"))
+ fclose (fp);
+ return -1;
+ }
+
+ if (strcmp (file, "-") && fclose (fp) == EOF)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+
+ printf ("%05lu %5ld", checksum, (total_bytes + 1024 - 1) / 1024);
+ if (print_name > 1)
+ printf (" %s", file);
+ putchar ('\n');
+
+ return 0;
+}
+
+/* Calculate and print the checksum and the size in 512-byte blocks
+ of file FILE, or of the standard input if FILE is "-".
+ If PRINT_NAME is >0, print FILE next to the checksum and size.
+ Return 0 if successful, -1 if an error occurs. */
+
+int
+sysv_sum_file (file, print_name)
+ char *file;
+ int print_name;
+{
+ int fd;
+ unsigned char buf[8192];
+ register int bytes_read;
+ register unsigned long checksum = 0;
+ long total_bytes = 0;
+
+ if (!strcmp (file, "-"))
+ {
+ fd = 0;
+ have_read_stdin = 1;
+ }
+ else
+ {
+ fd = open (file, O_RDONLY);
+ if (fd == -1)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+ }
+
+ while ((bytes_read = read (fd, buf, sizeof buf)) > 0)
+ {
+ register int i;
+
+ for (i = 0; i < bytes_read; i++)
+ checksum += buf[i];
+ total_bytes += bytes_read;
+ }
+
+ if (bytes_read < 0)
+ {
+ error (0, errno, "%s", file);
+ if (strcmp (file, "-"))
+ close (fd);
+ return -1;
+ }
+
+ if (strcmp (file, "-") && close (fd) == -1)
+ {
+ error (0, errno, "%s", file);
+ return -1;
+ }
+
+ printf ("%lu %ld", checksum % 0xffff, (total_bytes + 512 - 1) / 512);
+ if (print_name)
+ printf (" %s", file);
+ putchar ('\n');
+
+ return 0;
+}
diff --git a/src/tac.c b/src/tac.c
new file mode 100644
index 000000000..78e18467b
--- /dev/null
+++ b/src/tac.c
@@ -0,0 +1,628 @@
+/* tac - concatenate and print files in reverse
+ Copyright (C) 1988, 1989, 1990, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Jay Lepreau (lepreau@cs.utah.edu).
+ GNU enhancements by David MacKenzie (djm@ai.mit.edu). */
+
+/* Copy each FILE, or the standard input if none are given or when a
+ FILE name of "-" is encountered, to the standard output with the
+ order of the records reversed. The records are separated by
+ instances of a string, or a newline if none is given. By default, the
+ separator string is attached to the end of the record that it
+ follows in the file.
+
+ Options:
+ -b, --before The separator is attached to the beginning
+ of the record that it precedes in the file.
+ -r, --regex The separator is a regular expression.
+ -s, --separator=separator Use SEPARATOR as the record separator.
+
+ To reverse a file byte by byte, use (in bash, ksh, or sh):
+tac -r -s '.\|
+' file */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <regex.h>
+#include "system.h"
+
+#ifndef STDC_HEADERS
+char *malloc ();
+char *realloc ();
+#endif
+
+/* The number of bytes per atomic read. */
+#define INITIAL_READSIZE 8192
+
+/* The number of bytes per atomic write. */
+#define WRITESIZE 8192
+
+char *mktemp ();
+
+RETSIGTYPE cleanup ();
+int tac ();
+int tac_file ();
+int tac_stdin ();
+char *xmalloc ();
+char *xrealloc ();
+void output ();
+void error ();
+void save_stdin ();
+void xwrite ();
+
+/* The name this program was run with. */
+char *program_name;
+
+/* The string that separates the records of the file. */
+char *separator;
+
+/* If nonzero, print `separator' along with the record preceding it
+ in the file; otherwise with the record following it. */
+int separator_ends_record;
+
+/* 0 if `separator' is to be matched as a regular expression;
+ otherwise, the length of `separator', used as a sentinel to
+ stop the search. */
+int sentinel_length;
+
+/* The length of a match with `separator'. If `sentinel_length' is 0,
+ `match_length' is computed every time a match succeeds;
+ otherwise, it is simply the length of `separator'. */
+int match_length;
+
+/* The input buffer. */
+char *buffer;
+
+/* The number of bytes to read at once into `buffer'. */
+unsigned read_size;
+
+/* The size of `buffer'. This is read_size * 2 + sentinel_length + 2.
+ The extra 2 bytes allow `past_end' to have a value beyond the
+ end of `buffer' and `match_start' to run off the front of `buffer'. */
+unsigned buffer_size;
+
+/* The compiled regular expression representing `separator'. */
+static struct re_pattern_buffer compiled_separator;
+
+struct option longopts[] =
+{
+ {"before", 0, &separator_ends_record, 0},
+ {"regex", 0, &sentinel_length, 0},
+ {"separator", 1, NULL, 's'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ char *error_message; /* Return value from re_compile_pattern. */
+ int optc, errors;
+ int have_read_stdin = 0;
+
+ program_name = argv[0];
+ errors = 0;
+ separator = "\n";
+ sentinel_length = 1;
+ separator_ends_record = 1;
+
+ while ((optc = getopt_long (argc, argv, "brs:", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (optc)
+ {
+ case 0:
+ break;
+ case 'b':
+ separator_ends_record = 0;
+ break;
+ case 'r':
+ sentinel_length = 0;
+ break;
+ case 's':
+ separator = optarg;
+ if (*separator == 0)
+ error (1, 0, "separator cannot be empty");
+ break;
+ default:
+ fprintf (stderr, "\
+Usage: %s [-br] [-s separator] [--before] [--regex] [--separator=separator]\n\
+ [file...]\n",
+ program_name);
+ exit (1);
+ }
+ }
+
+ if (sentinel_length == 0)
+ {
+ compiled_separator.allocated = 100;
+ compiled_separator.buffer = (unsigned char *)
+ xmalloc (compiled_separator.allocated);
+ compiled_separator.fastmap = xmalloc (256);
+ compiled_separator.translate = 0;
+ error_message = re_compile_pattern (separator, strlen (separator),
+ &compiled_separator);
+ if (error_message)
+ error (1, 0, "%s", error_message);
+ }
+ else
+ match_length = sentinel_length = strlen (separator);
+
+ read_size = INITIAL_READSIZE;
+ /* A precaution that will probably never be needed. */
+ while (sentinel_length * 2 >= read_size)
+ read_size *= 2;
+ buffer_size = read_size * 2 + sentinel_length + 2;
+ buffer = xmalloc (buffer_size);
+ if (sentinel_length)
+ {
+ strcpy (buffer, separator);
+ buffer += sentinel_length;
+ }
+ else
+ ++buffer;
+
+ if (optind == argc)
+ {
+ have_read_stdin = 1;
+ errors = tac_stdin ();
+ }
+ else
+ for (; optind < argc; ++optind)
+ {
+ if (strcmp (argv[optind], "-") == 0)
+ {
+ have_read_stdin = 1;
+ errors |= tac_stdin ();
+ }
+ else
+ errors |= tac_file (argv[optind]);
+ }
+
+ /* Flush the output buffer. */
+ output ((char *) NULL, (char *) NULL);
+
+ if (have_read_stdin && close (0) < 0)
+ error (1, errno, "-");
+ if (close (1) < 0)
+ error (1, errno, "write error");
+ exit (errors);
+}
+
+/* The name of a temporary file containing a copy of pipe input. */
+char *tempfile;
+
+/* Print the standard input in reverse, saving it to temporary
+ file `tempfile' first if it is a pipe.
+ Return 0 if ok, 1 if an error occurs. */
+
+int
+tac_stdin ()
+{
+ /* Previous values of signal handlers. */
+ RETSIGTYPE (*sigint) (), (*sighup) (), (*sigpipe) (), (*sigterm) ();
+ int errors;
+ struct stat stats;
+#ifdef _POSIX_VERSION
+ struct sigaction oldact, newact;
+#endif /* _POSIX_VERSION */
+
+ /* No tempfile is needed for "tac < file".
+ Use fstat instead of checking for errno == ESPIPE because
+ lseek doesn't work on some special files but doesn't return an
+ error, either. */
+ if (fstat (0, &stats))
+ {
+ error (0, errno, "standard input");
+ return 1;
+ }
+ if (S_ISREG (stats.st_mode))
+ return tac (0, "standard input");
+
+#ifdef _POSIX_VERSION
+ newact.sa_handler = cleanup;
+ sigemptyset (&newact.sa_mask);
+ newact.sa_flags = 0;
+
+ sigaction (SIGINT, NULL, &oldact);
+ sigint = oldact.sa_handler;
+ if (sigint != SIG_IGN)
+ sigaction (SIGINT, &newact, NULL);
+
+ sigaction (SIGHUP, NULL, &oldact);
+ sighup = oldact.sa_handler;
+ if (sighup != SIG_IGN)
+ sigaction (SIGHUP, &newact, NULL);
+
+ sigaction (SIGPIPE, NULL, &oldact);
+ sigpipe = oldact.sa_handler;
+ if (sigpipe != SIG_IGN)
+ sigaction (SIGPIPE, &newact, NULL);
+
+ sigaction (SIGTERM, NULL, &oldact);
+ sigterm = oldact.sa_handler;
+ if (sigterm != SIG_IGN)
+ sigaction (SIGTERM, &newact, NULL);
+#else /* !_POSIX_VERSION */
+ sigint = signal (SIGINT, SIG_IGN);
+ if (sigint != SIG_IGN)
+ signal (SIGINT, cleanup);
+
+ sighup = signal (SIGHUP, SIG_IGN);
+ if (sighup != SIG_IGN)
+ signal (SIGHUP, cleanup);
+
+ sigpipe = signal (SIGPIPE, SIG_IGN);
+ if (sigpipe != SIG_IGN)
+ signal (SIGPIPE, cleanup);
+
+ sigterm = signal (SIGTERM, SIG_IGN);
+ if (sigterm != SIG_IGN)
+ signal (SIGTERM, cleanup);
+#endif /* _POSIX_VERSION */
+
+ save_stdin ();
+
+ errors = tac_file (tempfile);
+
+ unlink (tempfile);
+
+#ifdef _POSIX_VERSION
+ newact.sa_handler = sigint;
+ sigaction (SIGINT, &newact, NULL);
+ newact.sa_handler = sighup;
+ sigaction (SIGHUP, &newact, NULL);
+ newact.sa_handler = sigterm;
+ sigaction (SIGTERM, &newact, NULL);
+ newact.sa_handler = sigpipe;
+ sigaction (SIGPIPE, &newact, NULL);
+#else /* !_POSIX_VERSION */
+ signal (SIGINT, sigint);
+ signal (SIGHUP, sighup);
+ signal (SIGTERM, sigterm);
+ signal (SIGPIPE, sigpipe);
+#endif /* _POSIX_VERSION */
+
+ return errors;
+}
+
+/* Make a copy of the standard input in `tempfile'. */
+
+void
+save_stdin ()
+{
+ static char *template = NULL;
+ static char *tempdir;
+ int fd;
+ int bytes_read;
+
+ if (template == NULL)
+ {
+ tempdir = getenv ("TMPDIR");
+ if (tempdir == NULL)
+ tempdir = "/tmp";
+ template = xmalloc (strlen (tempdir) + 11);
+ }
+ sprintf (template, "%s/tacXXXXXX", tempdir);
+ tempfile = mktemp (template);
+
+ fd = creat (tempfile, 0600);
+ if (fd == -1)
+ {
+ error (0, errno, "%s", tempfile);
+ cleanup ();
+ }
+ while ((bytes_read = read (0, buffer, read_size)) > 0)
+ if (write (fd, buffer, bytes_read) != bytes_read)
+ {
+ error (0, errno, "%s", tempfile);
+ cleanup ();
+ }
+ if (close (fd) < 0)
+ {
+ error (0, errno, "%s", tempfile);
+ cleanup ();
+ }
+ if (bytes_read == -1)
+ {
+ error (0, errno, "read error");
+ cleanup ();
+ }
+}
+
+/* Print FILE in reverse.
+ Return 0 if ok, 1 if an error occurs. */
+
+int
+tac_file (file)
+ char *file;
+{
+ int fd, errors;
+
+ fd = open (file, 0);
+ if (fd == -1)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ errors = tac (fd, file);
+ if (close (fd) < 0)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ return errors;
+}
+
+/* Print in reverse the file open on descriptor FD for reading FILE.
+ Return 0 if ok, 1 if an error occurs. */
+
+int
+tac (fd, file)
+ int fd;
+ char *file;
+{
+ /* Pointer to the location in `buffer' where the search for
+ the next separator will begin. */
+ char *match_start;
+ /* Pointer to one past the rightmost character in `buffer' that
+ has not been printed yet. */
+ char *past_end;
+ unsigned saved_record_size; /* Length of the record growing in `buffer'. */
+ off_t file_pos; /* Offset in the file of the next read. */
+ /* Nonzero if `output' has not been called yet for any file.
+ Only used when the separator is attached to the preceding record. */
+ int first_time = 1;
+ char first_char = *separator; /* Speed optimization, non-regexp. */
+ char *separator1 = separator + 1; /* Speed optimization, non-regexp. */
+ int match_length1 = match_length - 1; /* Speed optimization, non-regexp. */
+ struct re_registers regs;
+
+ /* Find the size of the input file. */
+ file_pos = lseek (fd, (off_t) 0, SEEK_END);
+ if (file_pos < 1)
+ return 0; /* It's an empty file. */
+
+ /* Arrange for the first read to lop off enough to leave the rest of the
+ file a multiple of `read_size'. Since `read_size' can change, this may
+ not always hold during the program run, but since it usually will, leave
+ it here for i/o efficiency (page/sector boundaries and all that).
+ Note: the efficiency gain has not been verified. */
+ saved_record_size = file_pos % read_size;
+ if (saved_record_size == 0)
+ saved_record_size = read_size;
+ file_pos -= saved_record_size;
+ /* `file_pos' now points to the start of the last (probably partial) block
+ in the input file. */
+
+ lseek (fd, file_pos, SEEK_SET);
+ if (read (fd, buffer, saved_record_size) != saved_record_size)
+ {
+ error (0, 1, "%s", file);
+ return 1;
+ }
+
+ match_start = past_end = buffer + saved_record_size;
+ /* For non-regexp search, move past impossible positions for a match. */
+ if (sentinel_length)
+ match_start -= match_length1;
+
+ for (;;)
+ {
+ /* Search backward from `match_start' - 1 to `buffer' for a match
+ with `separator'; for speed, use strncmp if `separator' contains no
+ metacharacters.
+ If the match succeeds, set `match_start' to point to the start of
+ the match and `match_length' to the length of the match.
+ Otherwise, make `match_start' < `buffer'. */
+ if (sentinel_length == 0)
+ {
+ int i = match_start - buffer;
+ int ret;
+
+ ret = re_search (&compiled_separator, buffer, i, i - 1, -i, &regs);
+ if (ret == -1)
+ match_start = buffer - 1;
+ else if (ret == -2)
+ {
+ error (0, 0, "error in regular expression search");
+ cleanup ();
+ }
+ else
+ {
+ match_start = buffer + regs.start[0];
+ match_length = regs.end[0] - regs.start[0];
+ }
+ }
+ else
+ {
+ /* `match_length' is constant for non-regexp boundaries. */
+ while (*--match_start != first_char
+ || (match_length1 && strncmp (match_start + 1, separator1,
+ match_length1)))
+ /* Do nothing. */ ;
+ }
+
+ /* Check whether we backed off the front of `buffer' without finding
+ a match for `separator'. */
+ if (match_start < buffer)
+ {
+ if (file_pos == 0)
+ {
+ /* Hit the beginning of the file; print the remaining record. */
+ output (buffer, past_end);
+ return 0;
+ }
+
+ saved_record_size = past_end - buffer;
+ if (saved_record_size > read_size)
+ {
+ /* `buffer_size' is about twice `read_size', so since
+ we want to read in another `read_size' bytes before
+ the data already in `buffer', we need to increase
+ `buffer_size'. */
+ char *newbuffer;
+ int offset = sentinel_length ? sentinel_length : 1;
+
+ read_size *= 2;
+ buffer_size = read_size * 2 + sentinel_length + 2;
+ newbuffer = xrealloc (buffer - offset, buffer_size) + offset;
+ /* Adjust the pointers for the new buffer location. */
+ match_start += newbuffer - buffer;
+ past_end += newbuffer - buffer;
+ buffer = newbuffer;
+ }
+
+ /* Back up to the start of the next bufferfull of the file. */
+ if (file_pos >= read_size)
+ file_pos -= read_size;
+ else
+ {
+ read_size = file_pos;
+ file_pos = 0;
+ }
+ lseek (fd, file_pos, SEEK_SET);
+
+ /* Shift the pending record data right to make room for the new. */
+ bcopy (buffer, buffer + read_size, saved_record_size);
+ past_end = buffer + read_size + saved_record_size;
+ /* For non-regexp searches, avoid unneccessary scanning. */
+ if (sentinel_length)
+ match_start = buffer + read_size;
+ else
+ match_start = past_end;
+
+ if (read (fd, buffer, read_size) != read_size)
+ {
+ error (0, errno, "%s", file);
+ return 1;
+ }
+ }
+ else
+ {
+ /* Found a match of `separator'. */
+ if (separator_ends_record)
+ {
+ char *match_end = match_start + match_length;
+
+ /* If this match of `separator' isn't at the end of the
+ file, print the record. */
+ if (first_time == 0 || match_end != past_end)
+ output (match_end, past_end);
+ past_end = match_end;
+ first_time = 0;
+ }
+ else
+ {
+ output (match_start, past_end);
+ past_end = match_start;
+ }
+ match_start -= match_length - 1;
+ }
+ }
+}
+
+/* Print the characters from START to PAST_END - 1.
+ If START is NULL, just flush the buffer. */
+
+void
+output (start, past_end)
+ char *start;
+ char *past_end;
+{
+ static char buffer[WRITESIZE];
+ static int bytes_in_buffer = 0;
+ int bytes_to_add = past_end - start;
+ int bytes_available = WRITESIZE - bytes_in_buffer;
+
+ if (start == 0)
+ {
+ xwrite (1, buffer, bytes_in_buffer);
+ bytes_in_buffer = 0;
+ return;
+ }
+
+ /* Write out as many full buffers as possible. */
+ while (bytes_to_add >= bytes_available)
+ {
+ bcopy (start, buffer + bytes_in_buffer, bytes_available);
+ bytes_to_add -= bytes_available;
+ start += bytes_available;
+ xwrite (1, buffer, WRITESIZE);
+ bytes_in_buffer = 0;
+ bytes_available = WRITESIZE;
+ }
+
+ bcopy (start, buffer + bytes_in_buffer, bytes_to_add);
+ bytes_in_buffer += bytes_to_add;
+}
+
+RETSIGTYPE
+cleanup ()
+{
+ unlink (tempfile);
+ exit (1);
+}
+
+void
+xwrite (desc, buffer, size)
+ int desc;
+ char *buffer;
+ int size;
+{
+ if (write (desc, buffer, size) != size)
+ {
+ error (0, errno, "write error");
+ cleanup ();
+ }
+}
+
+/* Allocate N bytes of memory dynamically, with error checking. */
+
+char *
+xmalloc (n)
+ unsigned n;
+{
+ char *p;
+
+ p = malloc (n);
+ if (p == 0)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ }
+ return p;
+}
+
+/* Change the size of memory area P to N bytes, with error checking. */
+
+char *
+xrealloc (p, n)
+ char *p;
+ unsigned n;
+{
+ p = realloc (p, n);
+ if (p == 0)
+ {
+ error (0, 0, "virtual memory exhausted");
+ cleanup ();
+ }
+ return p;
+}
diff --git a/src/tail.c b/src/tail.c
new file mode 100644
index 000000000..050c1936f
--- /dev/null
+++ b/src/tail.c
@@ -0,0 +1,858 @@
+/* tail -- output last part of file(s)
+ Copyright (C) 1989, 1990, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Can display any amount of data, unlike the Unix version, which uses
+ a fixed size buffer and therefore can only deliver a limited number
+ of lines.
+
+ Options:
+ -b Tail by N 512-byte blocks.
+ -c, --bytes=N[bkm] Tail by N bytes
+ [or 512-byte blocks, kilobytes, or megabytes].
+ -f, --follow Loop forever trying to read more characters at the
+ end of the file, on the assumption that the file
+ is growing. Ignored if reading from a pipe.
+ Cannot be used if more than one file is given.
+ -k Tail by N kilobytes.
+ -N, -l, -n, --lines=N Tail by N lines.
+ -m Tail by N megabytes.
+ -q, --quiet, --silent Never print filename headers.
+ -v, --verbose Always print filename headers.
+
+ If a number (N) starts with a `+', begin printing with the Nth item
+ from the start of each file, instead of from the end.
+
+ Reads from standard input if no files are given or when a filename of
+ ``-'' is encountered.
+ By default, filename headers are printed only more than one file
+ is given.
+ By default, prints the last 10 lines (tail -n 10).
+
+ Original version by Paul Rubin <phr@ocf.berkeley.edu>.
+ Extensions by David MacKenzie <djm@ai.mit.edu>. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <sys/types.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISDIGIT(c) (isascii ((c)) && isdigit ((c)))
+#else
+#define ISDIGIT(c) (isdigit ((c)))
+#endif
+
+/* Number of items to tail. */
+#define DEFAULT_NUMBER 10
+
+/* Size of atomic reads. */
+#define BUFSIZE (512 * 8)
+
+/* Number of bytes per item we are printing.
+ If 0, tail in lines. */
+int unit_size;
+
+/* If nonzero, read from end of file until killed. */
+int forever;
+
+/* If nonzero, count from start of file instead of end. */
+int from_start;
+
+/* If nonzero, print filename headers. */
+int print_headers;
+
+/* When to print the filename banners. */
+enum header_mode
+{
+ multiple_files, always, never
+};
+
+char *xmalloc ();
+int file_lines ();
+int pipe_bytes ();
+int pipe_lines ();
+int start_bytes ();
+int start_lines ();
+int tail ();
+int tail_bytes ();
+int tail_file ();
+int tail_lines ();
+long atou();
+void dump_remainder ();
+void error ();
+void parse_unit ();
+void usage ();
+void write_header ();
+void xwrite ();
+
+/* The name this program was run with. */
+char *program_name;
+
+/* Nonzero if we have ever read standard input. */
+int have_read_stdin;
+
+struct option long_options[] =
+{
+ {"bytes", 1, NULL, 'c'},
+ {"follow", 0, NULL, 'f'},
+ {"lines", 1, NULL, 'n'},
+ {"quiet", 0, NULL, 'q'},
+ {"silent", 0, NULL, 'q'},
+ {"verbose", 0, NULL, 'v'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ enum header_mode header_mode = multiple_files;
+ int exit_status = 0;
+ /* If from_start, the number of items to skip before printing; otherwise,
+ the number of items at the end of the file to print. Initially, -1
+ means the value has not been set. */
+ long number = -1;
+ int c; /* Option character. */
+
+ program_name = argv[0];
+ have_read_stdin = 0;
+ unit_size = 0;
+ forever = from_start = print_headers = 0;
+
+ if (argc > 1
+ && ((argv[1][0] == '-' && ISDIGIT (argv[1][1]))
+ || (argv[1][0] == '+' && (ISDIGIT (argv[1][1]) || argv[1][1] == 0))))
+ {
+ /* Old option syntax: a dash or plus, one or more digits (zero digits
+ are acceptable with a plus), and one or more option letters. */
+ if (argv[1][0] == '+')
+ from_start = 1;
+ if (argv[1][1] != 0)
+ {
+ for (number = 0, ++argv[1]; ISDIGIT (*argv[1]); ++argv[1])
+ number = number * 10 + *argv[1] - '0';
+ /* Parse any appended option letters. */
+ while (*argv[1])
+ {
+ switch (*argv[1])
+ {
+ case 'b':
+ unit_size = 512;
+ break;
+
+ case 'c':
+ unit_size = 1;
+ break;
+
+ case 'f':
+ forever = 1;
+ break;
+
+ case 'k':
+ unit_size = 1024;
+ break;
+
+ case 'l':
+ unit_size = 0;
+ break;
+
+ case 'm':
+ unit_size = 1048576;
+ break;
+
+ case 'q':
+ header_mode = never;
+ break;
+
+ case 'v':
+ header_mode = always;
+ break;
+
+ default:
+ error (0, 0, "unrecognized option `-%c'", *argv[1]);
+ usage ();
+ }
+ ++argv[1];
+ }
+ }
+ /* Make the options we just parsed invisible to getopt. */
+ argv[1] = argv[0];
+ argv++;
+ argc--;
+ }
+
+ while ((c = getopt_long (argc, argv, "c:n:fqv", long_options, (int *) 0))
+ != EOF)
+ {
+ switch (c)
+ {
+ case 'c':
+ unit_size = 1;
+ parse_unit (optarg);
+ goto getnum;
+ case 'n':
+ unit_size = 0;
+ getnum:
+ if (*optarg == '+')
+ {
+ from_start = 1;
+ ++optarg;
+ }
+ else if (*optarg == '-')
+ ++optarg;
+ number = atou (optarg);
+ if (number == -1)
+ error (1, 0, "invalid number `%s'", optarg);
+ break;
+
+ case 'f':
+ forever = 1;
+ break;
+
+ case 'q':
+ header_mode = never;
+ break;
+
+ case 'v':
+ header_mode = always;
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ if (number == -1)
+ number = DEFAULT_NUMBER;
+
+ /* To start printing with item `number' from the start of the file, skip
+ `number' - 1 items. `tail +0' is actually meaningless, but for Unix
+ compatibility it's treated the same as `tail +1'. */
+ if (from_start)
+ {
+ if (number)
+ --number;
+ }
+
+ if (unit_size > 1)
+ number *= unit_size;
+
+ if (optind < argc - 1 && forever)
+ error (1, 0, "cannot follow the ends of multiple files");
+
+ if (header_mode == always
+ || (header_mode == multiple_files && optind < argc - 1))
+ print_headers = 1;
+
+ if (optind == argc)
+ exit_status |= tail_file ("-", number);
+
+ for (; optind < argc; ++optind)
+ exit_status |= tail_file (argv[optind], number);
+
+ if (have_read_stdin && close (0) < 0)
+ error (1, errno, "-");
+ if (close (1) < 0)
+ error (1, errno, "write error");
+ exit (exit_status);
+}
+
+/* Display the last NUMBER units of file FILENAME.
+ "-" for FILENAME means the standard input.
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+tail_file (filename, number)
+ char *filename;
+ long number;
+{
+ int fd;
+
+ if (!strcmp (filename, "-"))
+ {
+ have_read_stdin = 1;
+ filename = "standard input";
+ if (print_headers)
+ write_header (filename);
+ return tail (filename, 0, number);
+ }
+ else
+ {
+ fd = open (filename, O_RDONLY);
+ if (fd >= 0)
+ {
+ int errors;
+
+ if (print_headers)
+ write_header (filename);
+ errors = tail (filename, fd, number);
+ if (close (fd) == 0)
+ return errors;
+ }
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+}
+
+void
+write_header (filename)
+ char *filename;
+{
+ static int first_file = 1;
+
+ if (first_file)
+ {
+ xwrite (1, "==> ", 4);
+ first_file = 0;
+ }
+ else
+ xwrite (1, "\n==> ", 5);
+ xwrite (1, filename, strlen (filename));
+ xwrite (1, " <==\n", 5);
+}
+
+/* Display the last NUMBER units of file FILENAME, open for reading
+ in FD.
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+tail (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ if (unit_size)
+ return tail_bytes (filename, fd, number);
+ else
+ return tail_lines (filename, fd, number);
+}
+
+/* Display the last part of file FILENAME, open for reading in FD,
+ using NUMBER characters.
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+tail_bytes (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ struct stat stats;
+
+ /* Use fstat instead of checking for errno == ESPIPE because
+ lseek doesn't work on some special files but doesn't return an
+ error, either. */
+ if (fstat (fd, &stats))
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+
+ if (from_start)
+ {
+ if (S_ISREG (stats.st_mode))
+ lseek (fd, number, SEEK_SET);
+ else if (start_bytes (filename, fd, number))
+ return 1;
+ dump_remainder (filename, fd);
+ }
+ else
+ {
+ if (S_ISREG (stats.st_mode))
+ {
+ if (lseek (fd, 0L, SEEK_END) <= number)
+ /* The file is shorter than we want, or just the right size, so
+ print the whole file. */
+ lseek (fd, 0L, SEEK_SET);
+ else
+ /* The file is longer than we want, so go back. */
+ lseek (fd, -number, SEEK_END);
+ dump_remainder (filename, fd);
+ }
+ else
+ return pipe_bytes (filename, fd, number);
+ }
+ return 0;
+}
+
+/* Display the last part of file FILENAME, open for reading on FD,
+ using NUMBER lines.
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+tail_lines (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ struct stat stats;
+ long length;
+
+ if (fstat (fd, &stats))
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+
+ if (from_start)
+ {
+ if (start_lines (filename, fd, number))
+ return 1;
+ dump_remainder (filename, fd);
+ }
+ else
+ {
+ if (S_ISREG (stats.st_mode))
+ {
+ length = lseek (fd, 0L, SEEK_END);
+ if (length != 0 && file_lines (filename, fd, number, length))
+ return 1;
+ dump_remainder (filename, fd);
+ }
+ else
+ return pipe_lines (filename, fd, number);
+ }
+ return 0;
+}
+
+/* Print the last NUMBER lines from the end of file FD.
+ Go backward through the file, reading `BUFSIZE' bytes at a time (except
+ probably the first), until we hit the start of the file or have
+ read NUMBER newlines.
+ POS starts out as the length of the file (the offset of the last
+ byte of the file + 1).
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+file_lines (filename, fd, number, pos)
+ char *filename;
+ int fd;
+ long number;
+ long pos;
+{
+ char buffer[BUFSIZE];
+ int bytes_read;
+ int i; /* Index into `buffer' for scanning. */
+
+ if (number == 0)
+ return 0;
+
+ /* Set `bytes_read' to the size of the last, probably partial, buffer;
+ 0 < `bytes_read' <= `BUFSIZE'. */
+ bytes_read = pos % BUFSIZE;
+ if (bytes_read == 0)
+ bytes_read = BUFSIZE;
+ /* Make `pos' a multiple of `BUFSIZE' (0 if the file is short), so that all
+ reads will be on block boundaries, which might increase efficiency. */
+ pos -= bytes_read;
+ lseek (fd, pos, SEEK_SET);
+ bytes_read = read (fd, buffer, bytes_read);
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+
+ /* Count the incomplete line on files that don't end with a newline. */
+ if (bytes_read && buffer[bytes_read - 1] != '\n')
+ --number;
+
+ do
+ {
+ /* Scan backward, counting the newlines in this bufferfull. */
+ for (i = bytes_read - 1; i >= 0; i--)
+ {
+ /* Have we counted the requested number of newlines yet? */
+ if (buffer[i] == '\n' && number-- == 0)
+ {
+ /* If this newline wasn't the last character in the buffer,
+ print the text after it. */
+ if (i != bytes_read - 1)
+ xwrite (1, &buffer[i + 1], bytes_read - (i + 1));
+ return 0;
+ }
+ }
+ /* Not enough newlines in that bufferfull. */
+ if (pos == 0)
+ {
+ /* Not enough lines in the file; print the entire file. */
+ lseek (fd, 0L, SEEK_SET);
+ return 0;
+ }
+ pos -= BUFSIZE;
+ lseek (fd, pos, SEEK_SET);
+ }
+ while ((bytes_read = read (fd, buffer, BUFSIZE)) > 0);
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+ return 0;
+}
+
+/* Print the last NUMBER lines from the end of the standard input,
+ open for reading as pipe FD.
+ Buffer the text as a linked list of LBUFFERs, adding them as needed.
+ Return 0 if successful, 1 if an error occured. */
+
+int
+pipe_lines (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ struct linebuffer
+ {
+ int nbytes, nlines;
+ char buffer[BUFSIZE];
+ struct linebuffer *next;
+ };
+ typedef struct linebuffer LBUFFER;
+ LBUFFER *first, *last, *tmp;
+ int i; /* Index into buffers. */
+ int total_lines = 0; /* Total number of newlines in all buffers. */
+ int errors = 0;
+
+ first = last = (LBUFFER *) xmalloc (sizeof (LBUFFER));
+ first->nbytes = first->nlines = 0;
+ first->next = NULL;
+ tmp = (LBUFFER *) xmalloc (sizeof (LBUFFER));
+
+ /* Input is always read into a fresh buffer. */
+ while ((tmp->nbytes = read (fd, tmp->buffer, BUFSIZE)) > 0)
+ {
+ tmp->nlines = 0;
+ tmp->next = NULL;
+
+ /* Count the number of newlines just read. */
+ for (i = 0; i < tmp->nbytes; i++)
+ if (tmp->buffer[i] == '\n')
+ ++tmp->nlines;
+ total_lines += tmp->nlines;
+
+ /* If there is enough room in the last buffer read, just append the new
+ one to it. This is because when reading from a pipe, `nbytes' can
+ often be very small. */
+ if (tmp->nbytes + last->nbytes < BUFSIZE)
+ {
+ bcopy (tmp->buffer, &last->buffer[last->nbytes], tmp->nbytes);
+ last->nbytes += tmp->nbytes;
+ last->nlines += tmp->nlines;
+ }
+ else
+ {
+ /* If there's not enough room, link the new buffer onto the end of
+ the list, then either free up the oldest buffer for the next
+ read if that would leave enough lines, or else malloc a new one.
+ Some compaction mechanism is possible but probably not
+ worthwhile. */
+ last = last->next = tmp;
+ if (total_lines - first->nlines > number)
+ {
+ tmp = first;
+ total_lines -= first->nlines;
+ first = first->next;
+ }
+ else
+ tmp = (LBUFFER *) xmalloc (sizeof (LBUFFER));
+ }
+ }
+ if (tmp->nbytes == -1)
+ {
+ error (0, errno, "%s", filename);
+ errors = 1;
+ free ((char *) tmp);
+ goto free_lbuffers;
+ }
+
+ free ((char *) tmp);
+
+ /* This prevents a core dump when the pipe contains no newlines. */
+ if (number == 0)
+ goto free_lbuffers;
+
+ /* Count the incomplete line on files that don't end with a newline. */
+ if (last->buffer[last->nbytes - 1] != '\n')
+ {
+ ++last->nlines;
+ ++total_lines;
+ }
+
+ /* Run through the list, printing lines. First, skip over unneeded
+ buffers. */
+ for (tmp = first; total_lines - tmp->nlines > number; tmp = tmp->next)
+ total_lines -= tmp->nlines;
+
+ /* Find the correct beginning, then print the rest of the file. */
+ if (total_lines > number)
+ {
+ char *cp;
+
+ /* Skip `total_lines' - `number' newlines. We made sure that
+ `total_lines' - `number' <= `tmp->nlines'. */
+ cp = tmp->buffer;
+ for (i = total_lines - number; i; --i)
+ while (*cp++ != '\n')
+ /* Do nothing. */ ;
+ i = cp - tmp->buffer;
+ }
+ else
+ i = 0;
+ xwrite (1, &tmp->buffer[i], tmp->nbytes - i);
+
+ for (tmp = tmp->next; tmp; tmp = tmp->next)
+ xwrite (1, tmp->buffer, tmp->nbytes);
+
+free_lbuffers:
+ while (first)
+ {
+ tmp = first->next;
+ free ((char *) first);
+ first = tmp;
+ }
+ return errors;
+}
+
+/* Print the last NUMBER characters from the end of pipe FD.
+ This is a stripped down version of pipe_lines.
+ Return 0 if successful, 1 if an error occurred. */
+
+int
+pipe_bytes (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ struct charbuffer
+ {
+ int nbytes;
+ char buffer[BUFSIZE];
+ struct charbuffer *next;
+ };
+ typedef struct charbuffer CBUFFER;
+ CBUFFER *first, *last, *tmp;
+ int i; /* Index into buffers. */
+ int total_bytes = 0; /* Total characters in all buffers. */
+ int errors = 0;
+
+ first = last = (CBUFFER *) xmalloc (sizeof (CBUFFER));
+ first->nbytes = 0;
+ first->next = NULL;
+ tmp = (CBUFFER *) xmalloc (sizeof (CBUFFER));
+
+ /* Input is always read into a fresh buffer. */
+ while ((tmp->nbytes = read (fd, tmp->buffer, BUFSIZE)) > 0)
+ {
+ tmp->next = NULL;
+
+ total_bytes += tmp->nbytes;
+ /* If there is enough room in the last buffer read, just append the new
+ one to it. This is because when reading from a pipe, `nbytes' can
+ often be very small. */
+ if (tmp->nbytes + last->nbytes < BUFSIZE)
+ {
+ bcopy (tmp->buffer, &last->buffer[last->nbytes], tmp->nbytes);
+ last->nbytes += tmp->nbytes;
+ }
+ else
+ {
+ /* If there's not enough room, link the new buffer onto the end of
+ the list, then either free up the oldest buffer for the next
+ read if that would leave enough characters, or else malloc a new
+ one. Some compaction mechanism is possible but probably not
+ worthwhile. */
+ last = last->next = tmp;
+ if (total_bytes - first->nbytes > number)
+ {
+ tmp = first;
+ total_bytes -= first->nbytes;
+ first = first->next;
+ }
+ else
+ {
+ tmp = (CBUFFER *) xmalloc (sizeof (CBUFFER));
+ }
+ }
+ }
+ if (tmp->nbytes == -1)
+ {
+ error (0, errno, "%s", filename);
+ errors = 1;
+ free ((char *) tmp);
+ goto free_cbuffers;
+ }
+
+ free ((char *) tmp);
+
+ /* Run through the list, printing characters. First, skip over unneeded
+ buffers. */
+ for (tmp = first; total_bytes - tmp->nbytes > number; tmp = tmp->next)
+ total_bytes -= tmp->nbytes;
+
+ /* Find the correct beginning, then print the rest of the file.
+ We made sure that `total_bytes' - `number' <= `tmp->nbytes'. */
+ if (total_bytes > number)
+ i = total_bytes - number;
+ else
+ i = 0;
+ xwrite (1, &tmp->buffer[i], tmp->nbytes - i);
+
+ for (tmp = tmp->next; tmp; tmp = tmp->next)
+ xwrite (1, tmp->buffer, tmp->nbytes);
+
+free_cbuffers:
+ while (first)
+ {
+ tmp = first->next;
+ free ((char *) first);
+ first = tmp;
+ }
+ return errors;
+}
+
+/* Skip NUMBER characters from the start of pipe FD, and print
+ any extra characters that were read beyond that.
+ Return 1 on error, 0 if ok. */
+
+int
+start_bytes (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ char buffer[BUFSIZE];
+ int bytes_read = 0;
+
+ while (number > 0 && (bytes_read = read (fd, buffer, BUFSIZE)) > 0)
+ number -= bytes_read;
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+ else if (number < 0)
+ xwrite (1, &buffer[bytes_read + number], -number);
+ return 0;
+}
+
+/* Skip NUMBER lines at the start of file or pipe FD, and print
+ any extra characters that were read beyond that.
+ Return 1 on error, 0 if ok. */
+
+int
+start_lines (filename, fd, number)
+ char *filename;
+ int fd;
+ long number;
+{
+ char buffer[BUFSIZE];
+ int bytes_read = 0;
+ int bytes_to_skip = 0;
+
+ while (number && (bytes_read = read (fd, buffer, BUFSIZE)) > 0)
+ {
+ bytes_to_skip = 0;
+ while (bytes_to_skip < bytes_read)
+ if (buffer[bytes_to_skip++] == '\n' && --number == 0)
+ break;
+ }
+ if (bytes_read == -1)
+ {
+ error (0, errno, "%s", filename);
+ return 1;
+ }
+ else if (bytes_to_skip < bytes_read)
+ xwrite (1, &buffer[bytes_to_skip], bytes_read - bytes_to_skip);
+ return 0;
+}
+
+/* Display file FILENAME from the current position in FD
+ to the end. If `forever' is nonzero, keep reading from the
+ end of the file until killed. */
+
+void
+dump_remainder (filename, fd)
+ char *filename;
+ int fd;
+{
+ char buffer[BUFSIZE];
+ int bytes_read;
+
+output:
+ while ((bytes_read = read (fd, buffer, BUFSIZE)) > 0)
+ xwrite (1, buffer, bytes_read);
+ if (bytes_read == -1)
+ error (1, errno, "%s", filename);
+ if (forever)
+ {
+ sleep (1);
+ goto output;
+ }
+}
+
+void
+parse_unit (str)
+ char *str;
+{
+ int arglen = strlen (str);
+
+ if (arglen == 0)
+ return;
+
+ switch (str[arglen - 1])
+ {
+ case 'b':
+ unit_size = 512;
+ str[arglen - 1] = '\0';
+ break;
+ case 'k':
+ unit_size = 1024;
+ str[arglen - 1] = '\0';
+ break;
+ case 'm':
+ unit_size = 1048576;
+ str[arglen - 1] = '\0';
+ break;
+ }
+}
+
+/* Convert STR, a string of ASCII digits, into an unsigned integer.
+ Return -1 if STR does not represent a valid unsigned integer. */
+
+long
+atou (str)
+ char *str;
+{
+ unsigned long value;
+
+ for (value = 0; ISDIGIT (*str); ++str)
+ value = value * 10 + *str - '0';
+ return *str ? -1 : value;
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-c [+]N[bkm]] [-n [+]N] [-fqv] [--bytes=[+]N[bkm]] [--lines=[+]N]\n\
+ [--follow] [--quiet] [--silent] [--verbose] [file...]\n\
+ %s [{-,+}Nbcfklmqv] [file...]\n", program_name, program_name);
+ exit (1);
+}
diff --git a/src/tr.c b/src/tr.c
new file mode 100644
index 000000000..bd12f383f
--- /dev/null
+++ b/src/tr.c
@@ -0,0 +1,1813 @@
+/* tr -- a filter to translate characters
+ Copyright (C) 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Jim Meyering. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifndef isgraph
+#define isgraph(c) (isprint (c) && !isspace (c))
+#endif
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include "getopt.h"
+#include "system.h"
+
+#ifndef LONG_MAX
+#define LONG_MAX 0x7FFFFFFF
+#endif
+
+#ifndef UCHAR_MAX
+#define UCHAR_MAX 0xFF
+#endif
+
+#define N_CHARS (UCHAR_MAX + 1)
+
+/* A pointer to a function that returns an int. */
+typedef int (*PFI) ();
+
+/* Convert from character C to its index in the collating
+ sequence array. Just cast to an unsigned int to avoid
+ problems with sign-extension. */
+#define ORD(c) (unsigned int)(c)
+
+/* The inverse of ORD. */
+#define CHR(i) (unsigned char)(i)
+
+/* The value for Spec_list->state that indicates to
+ get_next that it should initialize the tail pointer.
+ Its value doesn't matter as long as it can't be
+ confused with a valid character code. */
+#define BEGIN_STATE (2 * N_CHARS)
+
+/* The value for Spec_list->state that indicates to
+ get_next that the element pointed to by Spec_list->tail is
+ being considered for the first time on this pass through the
+ list -- it indicates that get_next should make any necessary
+ initializations. */
+#define NEW_ELEMENT (BEGIN_STATE + 1)
+
+/* A value distinct from any character that may have been stored in a
+ buffer as the result of a block-read in the function squeeze_filter. */
+#define NOT_A_CHAR (unsigned int)(-1)
+
+/* The following (but not CC_NO_CLASS) are indices into the array of
+ valid character class strings. */
+enum Char_class
+{
+ CC_ALNUM = 0, CC_ALPHA = 1, CC_BLANK = 2, CC_CNTRL = 3,
+ CC_DIGIT = 4, CC_GRAPH = 5, CC_LOWER = 6, CC_PRINT = 7,
+ CC_PUNCT = 8, CC_SPACE = 9, CC_UPPER = 10, CC_XDIGIT = 11,
+ CC_NO_CLASS = 9999
+};
+
+/* Character class to which a character (returned by get_next) belonged;
+ but it is set only if the construct from which the character was obtained
+ was one of the character classes [:upper:] or [:lower:]. The value
+ is used only when translating and then, only to make sure that upper
+ and lower class constructs have the same relative positions in string1
+ and string2. */
+enum Upper_Lower_class
+{
+ UL_LOWER = 0,
+ UL_UPPER = 1,
+ UL_NONE = 2
+};
+
+/* A shortcut to ensure that when constructing the translation array,
+ one of the values returned by paired calls to get_next (from s1 and s2) is
+ from [:upper:] and the other is from [:lower:], or neither is
+ from upper or lower. In fact, no other character classes are allowed
+ when translating, but that condition is tested elsewhere. This array
+ is indexed by values of type enum Upper_Lower_class. */
+static int class_ok[3][3] =
+{
+ {0, 1, 0},
+ {1, 0, 0},
+ {0, 0, 1}
+};
+
+/* The type of a List_element. See build_spec_list for more details. */
+enum Range_element_type
+{
+ RE_NO_TYPE = 0,
+ RE_NORMAL_CHAR,
+ RE_RANGE,
+ RE_CHAR_CLASS,
+ RE_EQUIV_CLASS,
+ RE_REPEATED_CHAR
+};
+
+/* One construct in one of tr's argument strings.
+ For example, consider the POSIX version of the
+ classic tr command:
+ tr -cs 'a-zA-Z_' '[\n*]'
+ String1 has 3 constructs, two of which are ranges (a-z and A-Z),
+ and a single normal character, `_'. String2 has one construct. */
+struct List_element
+{
+ enum Range_element_type type;
+ struct List_element *next;
+ union
+ {
+ int normal_char;
+ struct /* unnamed */
+ {
+ unsigned int first_char;
+ unsigned int last_char;
+ } range;
+ enum Char_class char_class;
+ int equiv_code;
+ struct /* unnamed */
+ {
+ unsigned int the_repeated_char;
+ long repeat_count;
+ } repeated_char;
+ } u;
+};
+
+/* Each of tr's argument strings is parsed into a form that is easier
+ to work with: a linked list of constructs (struct List_element).
+ Each Spec_list structure also encapsulates various attributes of
+ the corresponding argument string. The attributes are used mainly
+ to verify that the strings are legal in the context of any options
+ specified (like -s, -d, or -c). The main exception is the member
+ `tail', which is first used to construct the list. After construction,
+ it is used by get_next to save its state when traversing the list.
+ The member `state' serves a similar function. */
+struct Spec_list
+{
+ /* Points to the head of the list of range elements.
+ The first struct is a dummy; its members are never used. */
+ struct List_element *head;
+
+ /* When appending, points to the last element. When traversing via
+ get_next(), points to the element to process next. Setting
+ Spec_list.state to the value BEGIN_STATE before calling get_next
+ signals get_next to initialize tail to point to head->next. */
+ struct List_element *tail;
+
+ /* Used to save state between calls to get_next(). */
+ unsigned int state;
+
+ /* Length, in the sense that length('a-z[:digit:]123abc')
+ is 42 ( = 26 + 10 + 6). */
+ int length;
+
+ /* The number of [c*] and [c*0] constructs that appear in this spec. */
+ int n_indefinite_repeats;
+
+ /* Non-zero if this spec contains at least one equivalence
+ class construct e.g. [=c=]. */
+ int has_equiv_class;
+
+ /* Non-zero if this spec contains at least one of [:upper:] or
+ [:lower:] class constructs. */
+ int has_upper_or_lower;
+
+ /* Non-zero if this spec contains at least one of the character class
+ constructs (all but upper and lower) that aren't allowed in s2. */
+ int has_restricted_char_class;
+};
+
+char *xmalloc ();
+char *stpcpy ();
+void error ();
+
+/* The name by which this program was run. */
+char *program_name;
+
+/* When non-zero, each sequence in the input of a repeated character
+ (call it c) is replaced (in the output) by a single occurrence of c
+ for every c in the squeeze set. */
+static int squeeze_repeats = 0;
+
+/* When non-zero, removes characters in the delete set from input. */
+static int delete = 0;
+
+/* Use the complement of set1 in place of set1. */
+static int complement = 0;
+
+/* When non-zero, this flag causes GNU tr to provide strict
+ compliance with POSIX draft 1003.2.11.2. The POSIX spec
+ says that when -d is used without -s, string2 (if present)
+ must be ignored. Silently ignoring arguments is a bad idea.
+ The default GNU behavior is to give a usage message and exit.
+ Additionally, when this flag is non-zero, tr prints warnings
+ on stderr if it is being used in a manner that is not portable.
+ Applicable warnings are given by default, but are suppressed
+ if the environment variable `POSIXLY_CORRECT' is set, since
+ being POSIX conformant means we can't issue such messages.
+ Warnings on the following topics are suppressed when this
+ variable is non-zero:
+ 1. Ambiguous octal escapes. */
+static int posix_pedantic;
+
+/* When tr is performing translation and string1 is longer than string2,
+ POSIX says that the result is undefined. That gives the implementor
+ of a POSIX conforming version of tr two reasonable choices for the
+ semantics of this case.
+
+ * The BSD tr pads string2 to the length of string1 by
+ repeating the last character in string2.
+
+ * System V tr ignores characters in string1 that have no
+ corresponding character in string2. That is, string1 is effectively
+ truncated to the length of string2.
+
+ When non-zero, this flag causes GNU tr to imitate the behavior
+ of System V tr when translating with string1 longer than string2.
+ The default is to emulate BSD tr. This flag is ignored in modes where
+ no translation is performed. Emulating the System V tr
+ in this exceptional case causes the relatively common BSD idiom:
+
+ tr -cs A-Za-z0-9 '\012'
+
+ to break (it would convert only zero bytes, rather than all
+ non-alphanumerics, to newlines).
+
+ WARNING: This switch does not provide general BSD or System V
+ compatibility. For example, it doesn't disable the interpretation
+ of the POSIX constructs [:alpha:], [=c=], and [c*10], so if by
+ some unfortunate coincidence you use such constructs in scripts
+ expecting to use some other version of tr, the scripts will break. */
+static int truncate_set1 = 0;
+
+/* An alias for (!delete && non_option_args == 2).
+ It is set in main and used there and in validate(). */
+static int translating;
+
+#ifndef BUFSIZ
+#define BUFSIZ 8192
+#endif
+
+#define IO_BUF_SIZE BUFSIZ
+static unsigned char io_buf[IO_BUF_SIZE];
+
+char *char_class_name[] =
+{
+ "alnum", "alpha", "blank", "cntrl", "digit", "graph",
+ "lower", "print", "punct", "space", "upper", "xdigit"
+};
+#define N_CHAR_CLASSES (sizeof(char_class_name) / sizeof(char_class_name[0]))
+
+typedef char SET_TYPE;
+
+/* Array of boolean values. A character `c' is a member of the
+ squeeze set if and only if in_squeeze_set[c] is true. The squeeze
+ set is defined by the last (possibly, the only) string argument
+ on the command line when the squeeze option is given. */
+static SET_TYPE in_squeeze_set[N_CHARS];
+
+/* Array of boolean values. A character `c' is a member of the
+ delete set if and only if in_delete_set[c] is true. The delete
+ set is defined by the first (or only) string argument on the
+ command line when the delete option is given. */
+static SET_TYPE in_delete_set[N_CHARS];
+
+/* Array of character values defining the translation (if any) that
+ tr is to perform. Translation is performed only when there are
+ two specification strings and the delete switch is not given. */
+static char xlate[N_CHARS];
+
+static struct option long_options[] =
+{
+ {"complement", 0, NULL, 'c'},
+ {"delete", 0, NULL, 'd'},
+ {"squeeze-repeats", 0, NULL, 's'},
+ {"truncate-set1", 0, NULL, 't'},
+ {NULL, 0, NULL, 0}
+};
+
+
+static void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-cdst] [--complement] [--delete] [--squeeze-repeats]\n\
+ [--truncate-set1] string1 [string2]\n",
+ program_name);
+ exit (2);
+}
+
+/* Return non-zero if the character C is a member of the
+ equivalence class containing the character EQUIV_CLASS. */
+
+static int
+is_equiv_class_member (equiv_class, c)
+ unsigned int equiv_class;
+ unsigned int c;
+{
+ return (equiv_class == c);
+}
+
+/* Return non-zero if the character C is a member of the
+ character class CHAR_CLASS. */
+
+static int
+is_char_class_member (char_class, c)
+ enum Char_class char_class;
+ unsigned int c;
+{
+ switch (char_class)
+ {
+ case CC_ALNUM:
+ return isalnum (c);
+ break;
+ case CC_ALPHA:
+ return isalpha (c);
+ break;
+ case CC_BLANK:
+ return isblank (c);
+ break;
+ case CC_CNTRL:
+ return iscntrl (c);
+ break;
+ case CC_DIGIT:
+ return isdigit (c);
+ break;
+ case CC_GRAPH:
+ return isgraph (c);
+ break;
+ case CC_LOWER:
+ return islower (c);
+ break;
+ case CC_PRINT:
+ return isprint (c);
+ break;
+ case CC_PUNCT:
+ return ispunct (c);
+ break;
+ case CC_SPACE:
+ return isspace (c);
+ break;
+ case CC_UPPER:
+ return isupper (c);
+ break;
+ case CC_XDIGIT:
+ return isxdigit (c);
+ break;
+ case CC_NO_CLASS:
+ abort ();
+ return 0;
+ break;
+ }
+}
+
+/* Perform the first pass over each range-spec argument S,
+ converting all \c and \ddd escapes to their one-byte representations.
+ The conversion is done in-place, so S must point to writable
+ storage. If an illegal quote sequence is found, an error message is
+ printed and the function returns non-zero. Otherwise the length of
+ the resulting string is returned through LEN and the function returns 0.
+ The resulting array of characters may contain zero-bytes; however,
+ on input, S is assumed to be null-terminated, and hence
+ cannot contain actual (non-escaped) zero bytes. */
+
+static int
+unquote (s, len)
+ unsigned char *s;
+ int *len;
+{
+ int i, j;
+
+ j = 0;
+ for (i = 0; s[i]; i++)
+ {
+ switch (s[i])
+ {
+ int c;
+ case '\\':
+ switch (s[i + 1])
+ {
+ int oct_digit;
+ case '\\':
+ c = '\\';
+ break;
+ case 'a':
+ c = '\007';
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'f':
+ c = '\f';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ case 'r':
+ c = '\r';
+ break;
+ case 't':
+ c = '\t';
+ break;
+ case 'v':
+ c = '\v';
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ c = s[i + 1] - '0';
+ oct_digit = s[i + 2] - '0';
+ if (0 <= oct_digit && oct_digit <= 7)
+ {
+ c = 8 * c + oct_digit;
+ ++i;
+ oct_digit = s[i + 2] - '0';
+ if (0 <= oct_digit && oct_digit <= 7)
+ {
+ if (8 * c + oct_digit < N_CHARS)
+ {
+ c = 8 * c + oct_digit;
+ ++i;
+ }
+ else if (!posix_pedantic)
+ {
+ /* Any octal number larger than 0377 won't
+ fit in 8 bits. So we stop when adding the
+ next digit would put us over the limit and
+ give a warning about the ambiguity. POSIX
+ isn't clear on this, but one person has said
+ that in his interpretation, POSIX says tr
+ can't even give a warning. */
+ error (0, 0, "warning: the ambiguous octal escape \
+\\%c%c%c is being\n\tinterpreted as the 2-byte sequence \\0%c%c, `%c'",
+ s[i], s[i + 1], s[i + 2],
+ s[i], s[i + 1], s[i + 2]);
+ }
+ }
+ }
+ break;
+ case '\0':
+ error (0, 0, "invalid backslash escape at end of string");
+ return 1;
+ break;
+ default:
+ error (0, 0, "invalid backslash escape `\\%c'", s[i + 1]);
+ return 1;
+ break;
+ }
+ ++i;
+ s[j++] = c;
+ break;
+ default:
+ s[j++] = s[i];
+ break;
+ }
+ }
+ *len = j;
+ return 0;
+}
+
+/* If CLASS_STR is a valid character class string, return its index
+ in the global char_class_name array. Otherwise, return CC_NO_CLASS. */
+
+static enum Char_class
+look_up_char_class (class_str)
+ unsigned char *class_str;
+{
+ unsigned int i;
+
+ for (i = 0; i < N_CHAR_CLASSES; i++)
+ if (strcmp (class_str, char_class_name[i]) == 0)
+ return (enum Char_class) i;
+ return CC_NO_CLASS;
+}
+
+/* Return a newly allocated string with a printable version of C.
+ This function is used solely for formatting error messages. */
+
+static char *
+make_printable_char (c)
+ unsigned int c;
+{
+ char *buf = xmalloc (5);
+
+ assert (c < N_CHARS);
+ if (isprint (c))
+ {
+ buf[0] = c;
+ buf[1] = '\0';
+ }
+ else
+ {
+ sprintf (buf, "\\%03o", c);
+ }
+ return buf;
+}
+
+/* Return a newly allocated copy of S which is suitable for printing.
+ LEN is the number of characters in S. Most non-printing
+ (isprint) characters are represented by a backslash followed by
+ 3 octal digits. However, the characters represented by \c escapes
+ where c is one of [abfnrtv] are represented by their 2-character \c
+ sequences. This function is used solely for printing error messages. */
+
+static char *
+make_printable_str (s, len)
+ unsigned char *s;
+ int len;
+{
+ /* Worst case is that every character expands to a backslash
+ followed by a 3-character octal escape sequence. */
+ char *printable_buf = xmalloc (4 * len + 1);
+ char *p = printable_buf;
+ int i;
+
+ for (i = 0; i < len; i++)
+ {
+ char buf[5];
+ char *tmp = NULL;
+
+ switch (s[i])
+ {
+ case '\\':
+ tmp = "\\";
+ break;
+ case '\007':
+ tmp = "\\a";
+ break;
+ case '\b':
+ tmp = "\\b";
+ break;
+ case '\f':
+ tmp = "\\f";
+ break;
+ case '\n':
+ tmp = "\\n";
+ break;
+ case '\r':
+ tmp = "\\r";
+ break;
+ case '\t':
+ tmp = "\\t";
+ break;
+ case '\v':
+ tmp = "\\v";
+ break;
+ default:
+ if (isprint (s[i]))
+ {
+ buf[0] = s[i];
+ buf[1] = '\0';
+ }
+ else
+ sprintf (buf, "\\%03o", s[i]);
+ tmp = buf;
+ break;
+ }
+ p = stpcpy (p, tmp);
+ }
+ return printable_buf;
+}
+
+/* Append a newly allocated structure representing a
+ character C to the specification list LIST. */
+
+static void
+append_normal_char (list, c)
+ struct Spec_list *list;
+ unsigned int c;
+{
+ struct List_element *new;
+
+ new = (struct List_element *) xmalloc (sizeof (struct List_element));
+ new->next = NULL;
+ new->type = RE_NORMAL_CHAR;
+ new->u.normal_char = c;
+ assert (list->tail);
+ list->tail->next = new;
+ list->tail = new;
+}
+
+/* Append a newly allocated structure representing the range
+ of characters from FIRST to LAST to the specification list LIST.
+ Return non-zero if LAST precedes FIRST in the collating sequence,
+ zero otherwise. This means that '[c-c]' is acceptable. */
+
+static int
+append_range (list, first, last)
+ struct Spec_list *list;
+ unsigned int first;
+ unsigned int last;
+{
+ struct List_element *new;
+
+ if (ORD (first) > ORD (last))
+ {
+ char *tmp1 = make_printable_char (first);
+ char *tmp2 = make_printable_char (last);
+
+ error (0, 0,
+ "range-endpoints of `%s-%s' are in reverse collating sequence order",
+ tmp1, tmp2);
+ free (tmp1);
+ free (tmp2);
+ return 1;
+ }
+ new = (struct List_element *) xmalloc (sizeof (struct List_element));
+ new->next = NULL;
+ new->type = RE_RANGE;
+ new->u.range.first_char = first;
+ new->u.range.last_char = last;
+ assert (list->tail);
+ list->tail->next = new;
+ list->tail = new;
+ return 0;
+}
+
+/* If CHAR_CLASS_STR is a valid character class string, append a
+ newly allocated structure representing that character class to the end
+ of the specification list LIST and return 0. If CHAR_CLASS_STR is not
+ a valid string, give an error message and return non-zero. */
+
+static int
+append_char_class (list, char_class_str, len)
+ struct Spec_list *list;
+ unsigned char *char_class_str;
+ int len;
+{
+ enum Char_class char_class;
+ struct List_element *new;
+
+ char_class = look_up_char_class (char_class_str);
+ if (char_class == CC_NO_CLASS)
+ {
+ char *tmp = make_printable_str (char_class_str, len);
+
+ error (0, 0, "invalid character class `%s'", tmp);
+ free (tmp);
+ return 1;
+ }
+ new = (struct List_element *) xmalloc (sizeof (struct List_element));
+ new->next = NULL;
+ new->type = RE_CHAR_CLASS;
+ new->u.char_class = char_class;
+ assert (list->tail);
+ list->tail->next = new;
+ list->tail = new;
+ return 0;
+}
+
+/* Append a newly allocated structure representing a [c*n]
+ repeated character construct, to the specification list LIST.
+ THE_CHAR is the single character to be repeated, and REPEAT_COUNT
+ is non-negative repeat count. */
+
+static void
+append_repeated_char (list, the_char, repeat_count)
+ struct Spec_list *list;
+ unsigned int the_char;
+ long int repeat_count;
+{
+ struct List_element *new;
+
+ new = (struct List_element *) xmalloc (sizeof (struct List_element));
+ new->next = NULL;
+ new->type = RE_REPEATED_CHAR;
+ new->u.repeated_char.the_repeated_char = the_char;
+ new->u.repeated_char.repeat_count = repeat_count;
+ assert (list->tail);
+ list->tail->next = new;
+ list->tail = new;
+}
+
+/* Given a string, EQUIV_CLASS_STR, from a [=str=] context and
+ the length of that string, LEN, if LEN is exactly one, append
+ a newly allocated structure representing the specified
+ equivalence class to the specification list, LIST and return zero.
+ If LEN is not 1, issue an error message and return non-zero. */
+
+static int
+append_equiv_class (list, equiv_class_str, len)
+ struct Spec_list *list;
+ unsigned char *equiv_class_str;
+ int len;
+{
+ struct List_element *new;
+
+ if (len != 1)
+ {
+ char *tmp = make_printable_str (equiv_class_str, len);
+
+ error (0, 0, "%s: equivalence class operand must be a single character",
+ tmp);
+ free (tmp);
+ return 1;
+ }
+ new = (struct List_element *) xmalloc (sizeof (struct List_element));
+ new->next = NULL;
+ new->type = RE_EQUIV_CLASS;
+ new->u.equiv_code = *equiv_class_str;
+ assert (list->tail);
+ list->tail->next = new;
+ list->tail = new;
+ return 0;
+}
+
+/* Return a newly allocated copy of P[FIRST_IDX..LAST_IDX]. */
+
+static unsigned char *
+substr (p, first_idx, last_idx)
+ unsigned char *p;
+ int first_idx;
+ int last_idx;
+{
+ int len = last_idx - first_idx + 1;
+ unsigned char *tmp = (unsigned char *) xmalloc (len);
+
+ assert (first_idx <= last_idx);
+ /* We must use bcopy or memcopy rather than strncpy
+ because `p' may contain zero-bytes. */
+ bcopy (p + first_idx, tmp, len);
+ tmp[len] = '\0';
+ return tmp;
+}
+
+/* Search forward starting at START_IDX for the 2-char sequence
+ (PRE_BRACKET_CHAR,']') in the string P of length P_LEN. If such
+ a sequence is found, return the index of the first character,
+ otherwise return -1. P may contain zero bytes. */
+
+static int
+find_closing_delim (p, start_idx, p_len, pre_bracket_char)
+ unsigned char *p;
+ int start_idx;
+ int p_len;
+ unsigned int pre_bracket_char;
+{
+ int i;
+
+ for (i = start_idx; i < p_len - 1; i++)
+ if (p[i] == pre_bracket_char && p[i + 1] == ']')
+ return i;
+ return -1;
+}
+
+/* Convert a string S with explicit length LEN, possibly
+ containing embedded zero bytes, to a long integer value.
+ If the string represents a negative value, a value larger
+ than LONG_MAX, or if all LEN characters do not represent a
+ valid integer, return non-zero and do not modify *VAL.
+ Otherwise, return zero and set *VAL to the converted value. */
+
+static int
+non_neg_strtol (s, len, val)
+ unsigned char *s;
+ int len;
+ long int *val;
+{
+ int i;
+ long sum = 0;
+ unsigned int base;
+
+ if (len <= 0)
+ return 1;
+ if (s[0] == '0')
+ base = 8;
+ else if (isdigit (s[0]))
+ base = 10;
+ else
+ return 1;
+
+ for (i = 0; i < len; i++)
+ {
+ int c = s[i] - '0';
+
+ if (c >= base || c < 0)
+ return 1;
+ if (i > 8 && sum > (LONG_MAX - c) / base)
+ return 1;
+ sum = sum * base + c;
+ }
+ *val = sum;
+ return 0;
+}
+
+/* Parse the bracketed repeat-char syntax. If the P_LEN characters
+ beginning with P[ START_IDX ] comprise a valid [c*n] construct,
+ return the character and the repeat count through the arg pointers,
+ CHAR_TO_REPEAT and N, and then return the index of the closing
+ bracket as the function value. If the second character following
+ the opening bracket is not `*' or if no closing bracket can be
+ found, return -1. If a closing bracket is found and the
+ second char is `*', but the string between the `*' and `]' isn't
+ empty, an octal number, or a decimal number, print an error message
+ and return -2. */
+
+static int
+find_bracketed_repeat (p, start_idx, p_len, char_to_repeat, n)
+ unsigned char *p;
+ int start_idx;
+ int p_len;
+ unsigned int *char_to_repeat;
+ long int *n;
+{
+ int i;
+
+ assert (start_idx + 1 < p_len);
+ if (p[start_idx + 1] != '*')
+ return -1;
+
+ for (i = start_idx + 2; i < p_len; i++)
+ {
+ if (p[i] == ']')
+ {
+ unsigned char *digit_str;
+ int digit_str_len = i - start_idx - 2;
+
+ *char_to_repeat = p[start_idx];
+ if (digit_str_len == 0)
+ {
+ /* We've matched [c*] -- no explicit repeat count. */
+ *n = 0;
+ return i;
+ }
+
+ /* Here, we have found [c*s] where s should be a string
+ of octal or decimal digits. */
+ digit_str = &p[start_idx + 2];
+ if (non_neg_strtol (digit_str, digit_str_len, n))
+ {
+ char *tmp = make_printable_str (digit_str, digit_str_len);
+ error (0, 0, "invalid repeat count `%s' in [c*n] construct", tmp);
+ free (tmp);
+ return -2;
+ }
+ return i;
+ }
+ }
+ return -1; /* No bracket found. */
+}
+
+/* Convert string UNESACPED_STRING (which has been preprocessed to
+ convert backslash-escape sequences) of length LEN characters into
+ a linked list of the following 5 types of constructs:
+ - [:str:] Character class where `str' is one of the 12 valid strings.
+ - [=c=] Equivalence class where `c' is any single character.
+ - [c*n] Repeat the single character `c' `n' times. n may be omitted.
+ However, if `n' is present, it must be a non-negative octal or
+ decimal integer.
+ - r-s Range of characters from `r' to `s'. The second endpoint must
+ not precede the first in the current collating sequence.
+ - c Any other character is interpreted as itself. */
+
+static int
+build_spec_list (unescaped_string, len, result)
+ unsigned char *unescaped_string;
+ int len;
+ struct Spec_list *result;
+{
+ unsigned char *p;
+ int i;
+
+ p = unescaped_string;
+
+ /* The main for-loop below recognizes the 4 multi-character constructs.
+ A character that matches (in its context) none of the multi-character
+ constructs is classified as `normal'. Since all multi-character
+ constructs have at least 3 characters, any strings of length 2 or
+ less are composed solely of normal characters. Hence, the index of
+ the outer for-loop runs only as far as LEN-2. */
+
+ for (i = 0; i < len - 2;)
+ {
+ switch (p[i])
+ {
+ int fall_through;
+ case '[':
+ fall_through = 0;
+ switch (p[i + 1])
+ {
+ int closing_delim_idx;
+ int closing_bracket_idx;
+ unsigned int char_to_repeat;
+ long repeat_count;
+ case ':':
+ case '=':
+ closing_delim_idx = find_closing_delim (p, i + 2, len, p[i + 1]);
+ if (closing_delim_idx >= 0)
+ {
+ int parse_failed;
+ unsigned char *opnd_str = substr (p, i + 2, closing_delim_idx - 1);
+ if (p[i + 1] == ':')
+ parse_failed = append_char_class (result, opnd_str,
+ (closing_delim_idx - 1) - (i + 2) + 1);
+ else
+ parse_failed = append_equiv_class (result, opnd_str,
+ (closing_delim_idx - 1) - (i + 2) + 1);
+ free (opnd_str);
+
+ /* Return non-zero if append_*_class reports a problem. */
+ if (parse_failed)
+ return 1;
+ else
+ i = closing_delim_idx + 2;
+ break;
+ }
+ /* Else fall through. This could be [:*] or [=*]. */
+ default:
+ /* Determine whether this is a bracketed repeat range
+ matching the RE \[.\*(dec_or_oct_number)?\]. */
+ closing_bracket_idx = find_bracketed_repeat (p, i + 1,
+ len, &char_to_repeat, &repeat_count);
+ if (closing_bracket_idx >= 0)
+ {
+ append_repeated_char (result, char_to_repeat, repeat_count);
+ i = closing_bracket_idx + 1;
+ break;
+ }
+ else if (closing_bracket_idx == -1)
+ {
+ fall_through = 1;
+ }
+ else
+ /* Found a string that looked like [c*n] but the
+ numeric part was invalid. */
+ return 1;
+ break;
+ }
+ if (!fall_through)
+ break;
+
+ /* Here if we've tried to match [c*n], [:str:], and [=c=]
+ and none of them fit. So we still have to consider the
+ range `[-c' (from `[' to `c'). */
+ default:
+ /* Look ahead one char for ranges like a-z. */
+ if (p[i + 1] == '-')
+ {
+ if (append_range (result, p[i], p[i + 2]))
+ return 1;
+ i += 3;
+ }
+ else
+ {
+ append_normal_char (result, p[i]);
+ ++i;
+ }
+ break;
+ }
+ }
+
+ /* Now handle the (2 or fewer) remaining characters p[i]..p[len - 1]. */
+ for (; i < len; i++)
+ append_normal_char (result, p[i]);
+
+ return 0;
+}
+
+
+/* Given a Spec_list S (with its saved state implicit in the values
+ of its members `tail' and `state'), return the next single character
+ in the expansion of S's constructs. If the last character of S was
+ returned on the previous call or if S was empty, this function
+ returns -1. For example, successive calls to get_next where S
+ represents the spec-string 'a-d[y*3]' will return the sequence
+ of values a, b, c, d, y, y, y, -1. Finally, if the construct from
+ which the returned character comes is [:upper:] or [:lower:], the
+ parameter CLASS is given a value to indicate which it was. Otherwise
+ CLASS is set to UL_NONE. This value is used only when constructing
+ the translation table to verify that any occurrences of upper and
+ lower class constructs in the spec-strings appear in the same relative
+ positions. */
+
+static int
+get_next (s, class)
+ struct Spec_list *s;
+ enum Upper_Lower_class *class;
+{
+ struct List_element *p;
+ int return_val;
+ int i;
+
+ if (class)
+ *class = UL_NONE;
+
+ if (s->state == BEGIN_STATE)
+ {
+ s->tail = s->head->next;
+ s->state = NEW_ELEMENT;
+ }
+
+ p = s->tail;
+ if (p == NULL)
+ return -1;
+
+ switch (p->type)
+ {
+ case RE_NORMAL_CHAR:
+ return_val = p->u.normal_char;
+ s->state = NEW_ELEMENT;
+ s->tail = p->next;
+ break;
+
+ case RE_RANGE:
+ if (s->state == NEW_ELEMENT)
+ s->state = ORD (p->u.range.first_char);
+ else
+ ++(s->state);
+ return_val = CHR (s->state);
+ if (s->state == ORD (p->u.range.last_char))
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ }
+ break;
+
+ case RE_CHAR_CLASS:
+ if (s->state == NEW_ELEMENT)
+ {
+ for (i = 0; i < N_CHARS; i++)
+ if (is_char_class_member (p->u.char_class, i))
+ break;
+ assert (i < N_CHARS);
+ s->state = i;
+ }
+ assert (is_char_class_member (p->u.char_class, s->state));
+ return_val = CHR (s->state);
+ for (i = s->state + 1; i < N_CHARS; i++)
+ if (is_char_class_member (p->u.char_class, i))
+ break;
+ if (i < N_CHARS)
+ s->state = i;
+ else
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ }
+ if (class)
+ {
+ switch (p->u.char_class)
+ {
+ case CC_LOWER:
+ *class = UL_LOWER;
+ break;
+ case CC_UPPER:
+ *class = UL_UPPER;
+ break;
+ default:
+ /* empty */
+ break;
+ }
+ }
+ break;
+
+ case RE_EQUIV_CLASS:
+ /* FIXME: this assumes that each character is alone in its own
+ equivalence class (which appears to be correct for my
+ LC_COLLATE. But I don't know of any function that allows
+ one to determine a character's equivalence class. */
+
+ return_val = p->u.equiv_code;
+ s->state = NEW_ELEMENT;
+ s->tail = p->next;
+ break;
+
+ case RE_REPEATED_CHAR:
+ /* Here, a repeat count of n == 0 means don't repeat at all. */
+ assert (p->u.repeated_char.repeat_count >= 0);
+ if (p->u.repeated_char.repeat_count == 0)
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ return_val = get_next (s, class);
+ }
+ else
+ {
+ if (s->state == NEW_ELEMENT)
+ {
+ s->state = 0;
+ }
+ ++(s->state);
+ return_val = p->u.repeated_char.the_repeated_char;
+ if (p->u.repeated_char.repeat_count > 0
+ && s->state == p->u.repeated_char.repeat_count)
+ {
+ s->tail = p->next;
+ s->state = NEW_ELEMENT;
+ }
+ }
+ break;
+
+ case RE_NO_TYPE:
+ abort ();
+ break;
+ }
+ return return_val;
+}
+
+/* This is a minor kludge. This function is called from
+ get_spec_stats to determine the cardinality of a set derived
+ from a complemented string. It's a kludge in that some of
+ the same operations are (duplicated) performed in set_initialize. */
+
+static int
+card_of_complement (s)
+ struct Spec_list *s;
+{
+ int c;
+ int cardinality = N_CHARS;
+ SET_TYPE in_set[N_CHARS];
+
+ bzero (in_set, N_CHARS * sizeof (in_set[0]));
+ s->state = BEGIN_STATE;
+ while ((c = get_next (s, NULL)) != -1)
+ if (!in_set[c]++)
+ --cardinality;
+ return cardinality;
+}
+
+/* Gather statistics about the spec-list S in preparation for the tests
+ in validate that determine the legality of the specs. This function
+ is called at most twice; once for string1, and again for any string2.
+ LEN_S1 < 0 indicates that this is the first call and that S represents
+ string1. When LEN_S1 >= 0, it is the length of the expansion of the
+ constructs in string1, and we can use its value to resolve any
+ indefinite repeat construct in S (which represents string2). Hence,
+ this function has the side-effect that it converts a valid [c*]
+ construct in string2 to [c*n] where n is large enough (or 0) to give
+ string2 the same length as string1. For example, with the command
+ tr a-z 'A[\n*]Z' on the second call to get_spec_stats, LEN_S1 would
+ be 26 and S (representing string2) would be converted to 'A[\n*24]Z'. */
+
+static void
+get_spec_stats (s, len_s1)
+ struct Spec_list *s;
+ int len_s1;
+{
+ struct List_element *p;
+ struct List_element *indefinite_repeat_element = NULL;
+ int len = 0;
+
+ s->n_indefinite_repeats = 0;
+ s->has_equiv_class = 0;
+ s->has_restricted_char_class = 0;
+ s->has_upper_or_lower = 0;
+ for (p = s->head->next; p; p = p->next)
+ {
+ switch (p->type)
+ {
+ int i;
+ case RE_NORMAL_CHAR:
+ ++len;
+ break;
+
+ case RE_RANGE:
+ assert (p->u.range.last_char >= p->u.range.first_char);
+ len += p->u.range.last_char - p->u.range.first_char + 1;
+ break;
+
+ case RE_CHAR_CLASS:
+ for (i = 0; i < N_CHARS; i++)
+ if (is_char_class_member (p->u.char_class, i))
+ ++len;
+ switch (p->u.char_class)
+ {
+ case CC_UPPER:
+ case CC_LOWER:
+ s->has_upper_or_lower = 1;
+ break;
+ default:
+ s->has_restricted_char_class = 1;
+ break;
+ }
+ break;
+
+ case RE_EQUIV_CLASS:
+ for (i = 0; i < N_CHARS; i++)
+ if (is_equiv_class_member (p->u.equiv_code, i))
+ ++len;
+ s->has_equiv_class = 1;
+ break;
+
+ case RE_REPEATED_CHAR:
+ if (p->u.repeated_char.repeat_count > 0)
+ len += p->u.repeated_char.repeat_count;
+ else if (p->u.repeated_char.repeat_count == 0)
+ {
+ indefinite_repeat_element = p;
+ ++(s->n_indefinite_repeats);
+ }
+ break;
+
+ case RE_NO_TYPE:
+ assert (0);
+ break;
+ }
+ }
+
+ if (len_s1 >= len && s->n_indefinite_repeats == 1)
+ {
+ indefinite_repeat_element->u.repeated_char.repeat_count = len_s1 - len;
+ len = len_s1;
+ }
+ if (complement && len_s1 < 0)
+ s->length = card_of_complement (s);
+ else
+ s->length = len;
+ return;
+}
+
+static void
+spec_init (spec_list)
+ struct Spec_list *spec_list;
+{
+ spec_list->head = spec_list->tail =
+ (struct List_element *) xmalloc (sizeof (struct List_element));
+ spec_list->head->next = NULL;
+}
+
+/* This function makes two passes over the argument string S. The first
+ one converts all \c and \ddd escapes to their one-byte representations.
+ The second constructs a linked specification list, SPEC_LIST, of the
+ characters and constructs that comprise the argument string. If either
+ of these passes detects an error, this function returns non-zero. */
+
+static int
+parse_str (s, spec_list)
+ unsigned char *s;
+ struct Spec_list *spec_list;
+{
+ int len;
+
+ if (unquote (s, &len))
+ return 1;
+ if (build_spec_list (s, len, spec_list))
+ return 1;
+ return 0;
+}
+
+/* Given two specification lists, S1 and S2, and assuming that
+ S1->length > S2->length, append a single [c*n] element to S2 where c
+ is the last character in the expansion of S2 and n is the difference
+ between the two lengths.
+ Upon successful completion, S2->length is set to S1->length. The only
+ way this function can fail to make S2 as long as S1 is when S2 has
+ zero-length, since in that case, there is no last character to repeat.
+ So S2->length is required to be at least 1.
+
+ Providing this functionality allows the user to do some pretty
+ non-BSD (and non-portable) things: For example, the command
+ tr -cs '[:upper:]0-9' '[:lower:]'
+ is almost guaranteed to give results that depend on your collating
+ sequence. */
+
+static void
+string2_extend (s1, s2)
+ struct Spec_list *s1;
+ struct Spec_list *s2;
+{
+ struct List_element *p;
+ int char_to_repeat;
+ int i;
+
+ assert (translating);
+ assert (s1->length > s2->length);
+ assert (s2->length > 0);
+
+ p = s2->tail;
+ switch (p->type)
+ {
+ case RE_NORMAL_CHAR:
+ char_to_repeat = p->u.normal_char;
+ break;
+ case RE_RANGE:
+ char_to_repeat = p->u.range.last_char;
+ break;
+ case RE_CHAR_CLASS:
+ for (i = N_CHARS; i >= 0; i--)
+ if (is_char_class_member (p->u.char_class, i))
+ break;
+ assert (i >= 0);
+ char_to_repeat = CHR (i);
+ break;
+
+ case RE_REPEATED_CHAR:
+ char_to_repeat = p->u.repeated_char.the_repeated_char;
+ break;
+
+ case RE_EQUIV_CLASS:
+ /* This shouldn't happen, because validate exits with an error
+ if it finds an equiv class in string2 when translating. */
+ abort ();
+ break;
+
+ case RE_NO_TYPE:
+ abort ();
+ break;
+ }
+ append_repeated_char (s2, char_to_repeat, s1->length - s2->length);
+ s2->length = s1->length;
+ return;
+}
+
+/* Die with an error message if S1 and S2 describe strings that
+ are not valid with the given command line switches.
+ A side effect of this function is that if a legal [c*] or
+ [c*0] construct appears in string2, it is converted to [c*n]
+ with a value for n that makes s2->length == s1->length. By
+ the same token, if the --truncate-set1 option is not
+ given, S2 may be extended. */
+
+static void
+validate (s1, s2)
+ struct Spec_list *s1;
+ struct Spec_list *s2;
+{
+ get_spec_stats (s1, -1);
+ if (s1->n_indefinite_repeats > 0)
+ {
+ error (1, 0, "the [c*] repeat construct may not appear in string1");
+ }
+
+ /* FIXME: it isn't clear from the POSIX spec that this is illegal,
+ but in the spirit of the other restrictions put on translation
+ with character classes, this seems a logical interpretation. */
+ if (complement && s1->has_upper_or_lower)
+ {
+ error (1, 0,
+ "character classes may not be used when translating and complementing");
+ }
+
+ if (s2)
+ {
+ get_spec_stats (s2, s1->length);
+ if (s2->has_restricted_char_class)
+ {
+ error (1, 0,
+ "when translating, the only character classes that may appear in\n\
+\tstring2 are `upper' and `lower'");
+ }
+
+ if (s2->n_indefinite_repeats > 1)
+ {
+ error (1, 0, "only one [c*] repeat construct may appear in string2");
+ }
+
+ if (translating)
+ {
+ if (s2->has_equiv_class)
+ {
+ error (1, 0,
+ "[=c=] expressions may not appear in string2 when translating");
+ }
+
+ if (s1->length > s2->length)
+ {
+ if (!truncate_set1)
+ {
+ /* string2 must be non-empty unless --truncate-set1 is
+ given or string1 is empty. */
+
+ if (s2->length == 0)
+ error (1, 0,
+ "when not truncating set1, string2 must be non-empty");
+ string2_extend (s1, s2);
+ }
+ }
+
+ if (complement && s2->has_upper_or_lower)
+ error (1, 0,
+ "character classes may not be used when translating and complementing");
+ }
+ else
+ /* Not translating. */
+ {
+ if (s2->n_indefinite_repeats > 0)
+ error (1, 0,
+ "the [c*] construct may appear in string2 only when translating");
+ }
+ }
+}
+
+/* Read buffers of SIZE bytes via the function READER (if READER is
+ NULL, read from stdin) until EOF. When non-NULL, READER is either
+ read_and_delete or read_and_xlate. After each buffer is read, it is
+ processed and written to stdout. The buffers are processed so that
+ multiple consecutive occurrences of the same character in the input
+ stream are replaced by a single occurrence of that character if the
+ character is in the squeeze set. */
+
+static void
+squeeze_filter (buf, size, reader)
+ unsigned char *buf;
+ long int size;
+ PFI reader;
+{
+ unsigned int char_to_squeeze = NOT_A_CHAR;
+ int i = 0;
+ int nr = 0;
+
+ for (;;)
+ {
+ int begin;
+
+ if (i >= nr)
+ {
+ if (reader == NULL)
+ nr = read (0, (char *) buf, size);
+ else
+ nr = (*reader) (buf, size, NULL);
+
+ if (nr < 0)
+ error (1, errno, "read error");
+ if (nr == 0)
+ break;
+ i = 0;
+ }
+
+ begin = i;
+
+ if (char_to_squeeze == NOT_A_CHAR)
+ {
+ int out_len;
+ /* Here, by being a little tricky, we can get a significant
+ performance increase in most cases when the input is
+ reasonably large. Since tr will modify the input only
+ if two consecutive (and identical) input characters are
+ in the squeeze set, we can step by two through the data
+ when searching for a character in the squeeze set. This
+ means there may be a little more work in a few cases and
+ perhaps twice as much work in the worst cases where most
+ of the input is removed by squeezing repeats. But most
+ uses of this functionality seem to remove less than 20-30%
+ of the input. */
+ for (; i < nr && !in_squeeze_set[buf[i]]; i += 2)
+ ; /* empty */
+
+ /* There is a special case when i == nr and we've just
+ skipped a character (the last one in buf) that is in
+ the squeeze set. */
+ if (i == nr && in_squeeze_set[buf[i - 1]])
+ --i;
+
+ if (i >= nr)
+ out_len = nr - begin;
+ else
+ {
+ char_to_squeeze = buf[i];
+ /* We're about to output buf[begin..i]. */
+ out_len = i - begin + 1;
+
+ /* But since we stepped by 2 in the loop above,
+ out_len may be one too large. */
+ if (i > 0 && buf[i - 1] == char_to_squeeze)
+ --out_len;
+
+ /* Advance i to the index of first character to be
+ considered when looking for a char different from
+ char_to_squeeze. */
+ ++i;
+ }
+ if (out_len > 0
+ && fwrite ((char *) &buf[begin], 1, out_len, stdout) == 0)
+ error (1, errno, "write error");
+ }
+
+ if (char_to_squeeze != NOT_A_CHAR)
+ {
+ /* Advance i to index of first char != char_to_squeeze
+ (or to nr if all the rest of the characters in this
+ buffer are the same as char_to_squeeze). */
+ for (; i < nr && buf[i] == char_to_squeeze; i++)
+ ; /* empty */
+ if (i < nr)
+ char_to_squeeze = NOT_A_CHAR;
+ /* If (i >= nr) we've squeezed the last character in this buffer.
+ So now we have to read a new buffer and continue comparing
+ characters against char_to_squeeze. */
+ }
+ }
+}
+
+/* Read buffers of SIZE bytes from stdin until one is found that
+ contains at least one character not in the delete set. Store
+ in the array BUF, all characters from that buffer that are not
+ in the delete set, and return the number of characters saved
+ or 0 upon EOF. */
+
+static long
+read_and_delete (buf, size, not_used)
+ unsigned char *buf;
+ long int size;
+ PFI not_used;
+{
+ long n_saved;
+ static int hit_eof = 0;
+
+ assert (not_used == NULL);
+ assert (size > 0);
+
+ if (hit_eof)
+ return 0;
+
+ /* This enclosing do-while loop is to make sure that
+ we don't return zero (indicating EOF) when we've
+ just deleted all the characters in a buffer. */
+ do
+ {
+ int i;
+ int nr = read (0, (char *) buf, size);
+
+ if (nr < 0)
+ error (1, errno, "read error");
+ if (nr == 0)
+ {
+ hit_eof = 1;
+ return 0;
+ }
+
+ /* This first loop may be a waste of code, but gives much
+ better performance when no characters are deleted in
+ the beginning of a buffer. It just avoids the copying
+ of buf[i] into buf[n_saved] when it would be a NOP. */
+
+ for (i = 0; i < nr && !in_delete_set[buf[i]]; i++)
+ /* empty */ ;
+ n_saved = i;
+
+ for (++i; i < nr; i++)
+ if (!in_delete_set[buf[i]])
+ buf[n_saved++] = buf[i];
+ }
+ while (n_saved == 0);
+
+ return n_saved;
+}
+
+/* Read at most SIZE bytes from stdin into the array BUF. Then
+ perform the in-place and one-to-one mapping specified by the global
+ array `xlate'. Return the number of characters read, or 0 upon EOF. */
+
+static long
+read_and_xlate (buf, size, not_used)
+ unsigned char *buf;
+ long int size;
+ PFI not_used;
+{
+ long chars_read = 0;
+ static int hit_eof = 0;
+ int i;
+
+ assert (not_used == NULL);
+ assert (size > 0);
+
+ if (hit_eof)
+ return 0;
+
+ chars_read = read (0, (char *) buf, size);
+ if (chars_read < 0)
+ error (1, errno, "read error");
+ if (chars_read == 0)
+ {
+ hit_eof = 1;
+ return 0;
+ }
+
+ for (i = 0; i < chars_read; i++)
+ buf[i] = xlate[buf[i]];
+
+ return chars_read;
+}
+
+/* Initialize a boolean membership set IN_SET with the character
+ values obtained by traversing the linked list of constructs S
+ using the function `get_next'. If COMPLEMENT_THIS_SET is
+ non-zero the resulting set is complemented. */
+
+static void
+set_initialize (s, complement_this_set, in_set)
+ struct Spec_list *s;
+ int complement_this_set;
+ SET_TYPE *in_set;
+{
+ int c;
+ int i;
+
+ bzero (in_set, N_CHARS * sizeof (in_set[0]));
+ s->state = BEGIN_STATE;
+ while ((c = get_next (s, NULL)) != -1)
+ in_set[c] = 1;
+ if (complement_this_set)
+ for (i = 0; i < N_CHARS; i++)
+ in_set[i] = (!in_set[i]);
+}
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int non_option_args;
+ struct Spec_list buf1, buf2;
+ struct Spec_list *s1 = &buf1;
+ struct Spec_list *s2 = &buf2;
+
+ program_name = argv[0];
+
+ while ((c = getopt_long (argc, argv, "cdst", long_options,
+ (int *) 0)) != EOF)
+ {
+ switch (c)
+ {
+ case 0:
+ break;
+
+ case 'c':
+ complement = 1;
+ break;
+
+ case 'd':
+ delete = 1;
+ break;
+
+ case 's':
+ squeeze_repeats = 1;
+ break;
+
+ case 't':
+ truncate_set1 = 1;
+ break;
+
+ default:
+ usage ();
+ break;
+ }
+ }
+
+ posix_pedantic = (getenv ("POSIXLY_CORRECT") != 0);
+
+ non_option_args = argc - optind;
+ translating = (non_option_args == 2 && !delete);
+
+ /* Change this test if it is legal to give tr no options and
+ no args at all. POSIX doesn't specifically say anything
+ either way, but it looks like they implied it's illegal
+ by omission. If you want to make tr do a slow imitation
+ of `cat' use `tr a a'. */
+ if (non_option_args > 2)
+ usage ();
+
+ if (!delete && !squeeze_repeats && non_option_args != 2)
+ error (1, 0, "two strings must be given when translating");
+
+ if (delete && squeeze_repeats && non_option_args != 2)
+ error (1, 0, "two strings must be given when both \
+deleting and squeezing repeats");
+
+ /* If --delete is given without --squeeze-repeats, then
+ only one string argument may be specified. But POSIX
+ says to ignore any string2 in this case, so if POSIXLY_CORRECT
+ is set, pretend we never saw string2. But I think
+ this deserves a fatal error, so that's the default. */
+ if ((delete && !squeeze_repeats) && non_option_args != 1)
+ {
+ if (posix_pedantic && non_option_args == 2)
+ --non_option_args;
+ else
+ error (1, 0,
+ "only one string may be given when deleting without squeezing repeats");
+ }
+
+ spec_init (s1);
+ if (parse_str ((unsigned char *) argv[optind], s1))
+ exit (1);
+
+ if (non_option_args == 2)
+ {
+ spec_init (s2);
+ if (parse_str ((unsigned char *) argv[optind + 1], s2))
+ exit (1);
+ }
+ else
+ s2 = NULL;
+
+ validate (s1, s2);
+
+ if (squeeze_repeats && non_option_args == 1)
+ {
+ set_initialize (s1, complement, in_squeeze_set);
+ squeeze_filter (io_buf, IO_BUF_SIZE, NULL);
+ }
+ else if (delete && non_option_args == 1)
+ {
+ int nr;
+
+ set_initialize (s1, complement, in_delete_set);
+ do
+ {
+ nr = read_and_delete (io_buf, IO_BUF_SIZE, NULL);
+ if (nr > 0 && fwrite ((char *) io_buf, 1, nr, stdout) == 0)
+ error (1, errno, "write error");
+ }
+ while (nr > 0);
+ }
+ else if (squeeze_repeats && delete && non_option_args == 2)
+ {
+ set_initialize (s1, complement, in_delete_set);
+ set_initialize (s2, 0, in_squeeze_set);
+ squeeze_filter (io_buf, IO_BUF_SIZE, (PFI) read_and_delete);
+ }
+ else if (translating)
+ {
+ if (complement)
+ {
+ int i;
+ SET_TYPE *in_s1 = in_delete_set;
+
+ set_initialize (s1, 0, in_s1);
+ s2->state = BEGIN_STATE;
+ for (i = 0; i < N_CHARS; i++)
+ xlate[i] = i;
+ for (i = 0; i < N_CHARS; i++)
+ {
+ if (!in_s1[i])
+ {
+ int c = get_next (s2, NULL);
+ assert (c != -1 || truncate_set1);
+ if (c == -1)
+ {
+ /* This will happen when tr is invoked like e.g.
+ tr -cs A-Za-z0-9 '\012'. */
+ break;
+ }
+ xlate[i] = c;
+ }
+ }
+ assert (get_next (s2, NULL) == -1 || truncate_set1);
+ }
+ else
+ {
+ int c1, c2;
+ int i;
+ enum Upper_Lower_class class_s1;
+ enum Upper_Lower_class class_s2;
+
+ for (i = 0; i < N_CHARS; i++)
+ xlate[i] = i;
+ s1->state = BEGIN_STATE;
+ s2->state = BEGIN_STATE;
+ for (;;)
+ {
+ c1 = get_next (s1, &class_s1);
+ c2 = get_next (s2, &class_s2);
+ if (!class_ok[(int) class_s1][(int) class_s2])
+ error (1, 0,
+ "misaligned or mismatched upper and/or lower classes");
+ /* The following should have been checked by validate... */
+ if (c2 == -1)
+ break;
+ xlate[c1] = c2;
+ }
+ assert (c1 == -1 || truncate_set1);
+ }
+ if (squeeze_repeats)
+ {
+ set_initialize (s2, 0, in_squeeze_set);
+ squeeze_filter (io_buf, IO_BUF_SIZE, (PFI) read_and_xlate);
+ }
+ else
+ {
+ int chars_read;
+
+ do
+ {
+ chars_read = read_and_xlate (io_buf, IO_BUF_SIZE, NULL);
+ if (chars_read > 0
+ && fwrite ((char *) io_buf, 1, chars_read, stdout) == 0)
+ error (1, errno, "write error");
+ }
+ while (chars_read > 0);
+ }
+ }
+
+ exit (0);
+}
+
diff --git a/src/unexpand.c b/src/unexpand.c
new file mode 100644
index 000000000..2733ef77a
--- /dev/null
+++ b/src/unexpand.c
@@ -0,0 +1,432 @@
+/* unexpand - convert spaces to tabs
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* By default, convert only maximal strings of initial blanks and tabs
+ into tabs.
+ Preserves backspace characters in the output; they decrement the
+ column count for tab calculations.
+ The default action is equivalent to -8.
+
+ Options:
+ --tabs=tab1[,tab2[,...]]
+ -t tab1[,tab2[,...]]
+ -tab1[,tab2[,...]] If only one tab stop is given, set the tabs tab1
+ spaces apart instead of the default 8. Otherwise,
+ set the tabs at columns tab1, tab2, etc. (numbered from
+ 0); replace any tabs beyond the tabstops given with
+ single spaces.
+ --all
+ -a Use tabs wherever they would replace 2 or more spaces,
+ not just at the beginnings of lines.
+
+ David MacKenzie <djm@ai.mit.edu> */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+#ifdef isascii
+#define ISDIGIT(c) (isascii((c)) && isdigit((c)))
+#else
+#define ISDIGIT(c) (isdigit((c)))
+#endif
+
+/* The number of bytes added at a time to the amount of memory
+ allocated for the output line. */
+#define OUTPUT_BLOCK 256
+
+/* The number of bytes added at a time to the amount of memory
+ allocated for the list of tabstops. */
+#define TABLIST_BLOCK 256
+
+char *xmalloc ();
+char *xrealloc ();
+void error ();
+
+FILE *next_file ();
+void add_tabstop ();
+void parse_tabstops ();
+void unexpand ();
+void usage ();
+void validate_tabstops ();
+
+/* If nonzero, convert blanks even after nonblank characters have been
+ read on the line. */
+int convert_entire_line;
+
+/* If nonzero, the size of all tab stops. If zero, use `tab_list' instead. */
+int tab_size;
+
+/* Array of the explicit column numbers of the tab stops;
+ after `tab_list' is exhausted, the rest of the line is printed
+ unchanged. The first column is column 0. */
+int *tab_list;
+
+/* The index of the first invalid element of `tab_list',
+ where the next element can be added. */
+int first_free_tab;
+
+/* Null-terminated array of input filenames. */
+char **file_list;
+
+/* Default for `file_list' if no files are given on the command line. */
+char *stdin_argv[] =
+{
+ "-", NULL
+};
+
+/* Nonzero if we have ever read standard input. */
+int have_read_stdin;
+
+/* Status to return to the system. */
+int exit_status;
+
+/* The name this program was run with. */
+char *program_name;
+
+struct option longopts[] =
+{
+ {"tabs", 1, NULL, 't'},
+ {"all", 0, NULL, 'a'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int tabval = -1; /* Value of tabstop being read, or -1. */
+ int c; /* Option character. */
+
+ have_read_stdin = 0;
+ exit_status = 0;
+ convert_entire_line = 0;
+ tab_list = NULL;
+ first_free_tab = 0;
+ program_name = argv[0];
+
+ while ((c = getopt_long (argc, argv, "at:,0123456789", longopts, (int *) 0))
+ != EOF)
+ {
+ switch (c)
+ {
+ case '?':
+ usage ();
+ case 'a':
+ convert_entire_line = 1;
+ break;
+ case 't':
+ convert_entire_line = 1;
+ parse_tabstops (optarg);
+ break;
+ case ',':
+ add_tabstop (tabval);
+ tabval = -1;
+ break;
+ default:
+ if (tabval == -1)
+ tabval = 0;
+ tabval = tabval * 10 + c - '0';
+ break;
+ }
+ }
+
+ add_tabstop (tabval);
+
+ validate_tabstops (tab_list, first_free_tab);
+
+ if (first_free_tab == 0)
+ tab_size = 8;
+ else if (first_free_tab == 1)
+ tab_size = tab_list[0];
+ else
+ tab_size = 0;
+
+ if (optind == argc)
+ file_list = stdin_argv;
+ else
+ file_list = &argv[optind];
+
+ unexpand ();
+
+ if (have_read_stdin && fclose (stdin) == EOF)
+ error (1, errno, "-");
+ if (fclose (stdout) == EOF)
+ error (1, errno, "write error");
+ exit (exit_status);
+}
+
+/* Add the comma or blank separated list of tabstops STOPS
+ to the list of tabstops. */
+
+void
+parse_tabstops (stops)
+ char *stops;
+{
+ int tabval = -1;
+
+ for (; *stops; stops++)
+ {
+ if (*stops == ',' || isblank (*stops))
+ {
+ add_tabstop (tabval);
+ tabval = -1;
+ }
+ else if (ISDIGIT (*stops))
+ {
+ if (tabval == -1)
+ tabval = 0;
+ tabval = tabval * 10 + *stops - '0';
+ }
+ else
+ error (1, 0, "tab size contains an invalid character");
+ }
+
+ add_tabstop (tabval);
+}
+
+/* Add tab stop TABVAL to the end of `tab_list', except
+ if TABVAL is -1, do nothing. */
+
+void
+add_tabstop (tabval)
+ int tabval;
+{
+ if (tabval == -1)
+ return;
+ if (first_free_tab % TABLIST_BLOCK == 0)
+ tab_list = (int *) xrealloc (tab_list, first_free_tab + TABLIST_BLOCK);
+ tab_list[first_free_tab++] = tabval;
+}
+
+/* Check that the list of tabstops TABS, with ENTRIES entries,
+ contains only nonzero, ascending values. */
+
+void
+validate_tabstops (tabs, entries)
+ int *tabs;
+ int entries;
+{
+ int prev_tab = 0;
+ int i;
+
+ for (i = 0; i < entries; i++)
+ {
+ if (tabs[i] == 0)
+ error (1, 0, "tab size cannot be 0");
+ if (tabs[i] <= prev_tab)
+ error (1, 0, "tab sizes must be ascending");
+ prev_tab = tabs[i];
+ }
+}
+
+/* Change spaces to tabs, writing to stdout.
+ Read each file in `file_list', in order. */
+
+void
+unexpand ()
+{
+ FILE *fp; /* Input stream. */
+ int c; /* Each input character. */
+ /* Index in `tab_list' of next tabstop: */
+ int tab_index = 0; /* For calculating width of pending tabs. */
+ int print_tab_index = 0; /* For printing as many tabs as possible. */
+ int column = 0; /* Column on screen of next char. */
+ int next_tab_column; /* Column the next tab stop is on. */
+ int convert = 1; /* If nonzero, perform translations. */
+ int pending = 0; /* Pending columns of blanks. */
+
+ fp = next_file ((FILE *) NULL);
+ for (;;)
+ {
+ c = getc (fp);
+ if (c == EOF)
+ {
+ fp = next_file (fp);
+ if (fp == NULL)
+ break; /* No more files. */
+ else
+ continue;
+ }
+
+ if (c == ' ' && convert)
+ {
+ ++pending;
+ ++column;
+ }
+ else if (c == '\t' && convert)
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (tab_index < first_free_tab - 1
+ && column >= tab_list[tab_index])
+ tab_index++;
+ next_tab_column = tab_list[tab_index];
+ if (tab_index < first_free_tab - 1)
+ tab_index++;
+ if (column >= next_tab_column)
+ {
+ convert = 0; /* Ran out of tab stops. */
+ goto flush_pend;
+ }
+ }
+ else
+ {
+ next_tab_column = column + tab_size - column % tab_size;
+ }
+ pending += next_tab_column - column;
+ column = next_tab_column;
+ }
+ else
+ {
+ flush_pend:
+ /* Flush pending spaces. Print as many tabs as possible,
+ then print the rest as spaces. */
+ if (pending == 1)
+ {
+ putchar (' ');
+ pending = 0;
+ }
+ column -= pending;
+ while (pending != 0)
+ {
+ if (tab_size == 0)
+ {
+ /* Do not let tab_index == first_free_tab;
+ stop when it is 1 less. */
+ while (tab_index < first_free_tab - 1
+ && column >= tab_list[tab_index])
+ print_tab_index++;
+ next_tab_column = tab_list[print_tab_index];
+ if (print_tab_index < first_free_tab - 1)
+ print_tab_index++;
+ }
+ else
+ {
+ next_tab_column = column + tab_size - column % tab_size;
+ }
+ if (next_tab_column - column <= pending)
+ {
+ putchar ('\t');
+ pending -= next_tab_column - column;
+ column = next_tab_column;
+ }
+ else
+ {
+ --print_tab_index;
+ column += pending;
+ while (pending != 0)
+ {
+ putchar (' ');
+ pending--;
+ }
+ }
+ }
+
+ if (convert)
+ {
+ if (c == '\b')
+ {
+ if (column > 0)
+ --column;
+ }
+ else
+ {
+ ++column;
+ if (convert_entire_line == 0)
+ convert = 0;
+ }
+ }
+
+ putchar (c);
+
+ if (c == '\n')
+ {
+ tab_index = print_tab_index = 0;
+ column = pending = 0;
+ convert = 1;
+ }
+ }
+ }
+}
+
+/* Close the old stream pointer FP if it is non-NULL,
+ and return a new one opened to read the next input file.
+ Open a filename of `-' as the standard input.
+ Return NULL if there are no more input files. */
+
+FILE *
+next_file (fp)
+ FILE *fp;
+{
+ static char *prev_file;
+ char *file;
+
+ if (fp)
+ {
+ if (ferror (fp))
+ {
+ error (0, errno, "%s", prev_file);
+ exit_status = 1;
+ }
+ if (fp == stdin)
+ clearerr (fp); /* Also clear EOF. */
+ else if (fclose (fp) == EOF)
+ {
+ error (0, errno, "%s", prev_file);
+ exit_status = 1;
+ }
+ }
+
+ while ((file = *file_list++) != NULL)
+ {
+ if (file[0] == '-' && file[1] == '\0')
+ {
+ have_read_stdin = 1;
+ prev_file = file;
+ return stdin;
+ }
+ fp = fopen (file, "r");
+ if (fp)
+ {
+ prev_file = file;
+ return fp;
+ }
+ error (0, errno, "%s", file);
+ exit_status = 1;
+ }
+ return NULL;
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-tab1[,tab2[,...]]] [-t tab1[,tab2[,...]]] [-a]\n\
+ [--tabs=tab1[,tab2[,...]]] [--all] [file...]\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/uniq.c b/src/uniq.c
new file mode 100644
index 000000000..0968cbae4
--- /dev/null
+++ b/src/uniq.c
@@ -0,0 +1,321 @@
+/* uniq -- remove duplicate lines from a sorted file
+ Copyright (C) 1986, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Richard Stallman and David MacKenzie. */
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#ifndef isblank
+#define isblank(c) ((c) == ' ' || (c) == '\t')
+#endif
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+#include "linebuffer.h"
+
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+char *find_field ();
+int different ();
+void check_file ();
+void error ();
+void usage ();
+void writeline ();
+
+/* Number of fields to skip on each line when doing comparisons. */
+int skip_fields;
+
+/* Number of chars to skip after skipping any fields. */
+int skip_chars;
+
+/* Number of chars to compare; if 0, compare the whole lines. */
+int check_chars;
+
+enum countmode
+{
+ count_occurrences, /* -c Print count before output lines. */
+ count_none /* Default. Do not print counts. */
+};
+
+/* Whether and how to precede the output lines with a count of the number of
+ times they occurred in the input. */
+enum countmode countmode;
+
+enum output_mode
+{
+ output_repeated, /* -d Only lines that are repeated. */
+ output_unique, /* -u Only lines that are not repeated. */
+ output_all /* Default. Print first copy of each line. */
+};
+
+/* Which lines to output. */
+enum output_mode mode;
+
+/* The name this program was run with. */
+char *program_name;
+
+struct option longopts[] =
+{
+ {"count", 0, NULL, 'c'},
+ {"repeated", 0, NULL, 'd'},
+ {"unique", 0, NULL, 'u'},
+ {"skip-fields", 1, NULL, 'f'},
+ {"skip-chars", 1, NULL, 's'},
+ {"check-chars", 1, NULL, 'w'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ int optc;
+ char *infile = "-", *outfile = "-";
+
+ program_name = argv[0];
+ skip_chars = 0;
+ skip_fields = 0;
+ check_chars = 0;
+ mode = output_all;
+ countmode = count_none;
+
+ while ((optc = getopt_long (argc, argv, "0123456789cdf:s:uw:", longopts,
+ (int *) 0)) != EOF)
+ {
+ switch (optc)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ skip_fields = skip_fields * 10 + optc - '0';
+ break;
+
+ case 'c':
+ countmode = count_occurrences;
+ break;
+
+ case 'd':
+ mode = output_repeated;
+ break;
+
+ case 'f': /* Like '-#'. */
+ skip_fields = atoi (optarg);
+ break;
+
+ case 's': /* Like '+#'. */
+ skip_chars = atoi (optarg);
+ break;
+
+ case 'u':
+ mode = output_unique;
+ break;
+
+ case 'w':
+ check_chars = atoi (optarg);
+ break;
+
+ default:
+ usage ();
+ }
+ }
+
+ while (optind < argc && argv[optind][0] == '+')
+ skip_chars = atoi (argv[optind++]);
+
+ if (optind < argc)
+ infile = argv[optind++];
+
+ if (optind < argc)
+ outfile = argv[optind++];
+
+ if (optind < argc)
+ usage (); /* Extra arguments. */
+
+ check_file (infile, outfile);
+
+ exit (0);
+}
+
+/* Process input file INFILE with output to OUTFILE.
+ If either is "-", use the standard I/O stream for it instead. */
+
+void
+check_file (infile, outfile)
+ char *infile, *outfile;
+{
+ FILE *istream;
+ FILE *ostream;
+ struct linebuffer lb1, lb2;
+ struct linebuffer *thisline, *prevline, *exch;
+ char *prevfield, *thisfield;
+ int prevlen, thislen;
+ int match_count = 0;
+
+ if (!strcmp (infile, "-"))
+ istream = stdin;
+ else
+ istream = fopen (infile, "r");
+ if (istream == NULL)
+ error (1, errno, "%s", infile);
+
+ if (!strcmp (outfile, "-"))
+ ostream = stdout;
+ else
+ ostream = fopen (outfile, "w");
+ if (ostream == NULL)
+ error (1, errno, "%s", outfile);
+
+ thisline = &lb1;
+ prevline = &lb2;
+
+ initbuffer (thisline);
+ initbuffer (prevline);
+
+ if (readline (prevline, istream) == 0)
+ goto closefiles;
+ prevfield = find_field (prevline);
+ prevlen = prevline->length - (prevfield - prevline->buffer);
+
+ while (!feof (istream))
+ {
+ if (readline (thisline, istream) == 0)
+ break;
+ thisfield = find_field (thisline);
+ thislen = thisline->length - (thisfield - thisline->buffer);
+ if (!different (thisfield, prevfield, thislen, prevlen))
+ match_count++;
+ else
+ {
+ writeline (prevline, ostream, match_count);
+ match_count = 0;
+
+ exch = prevline;
+ prevline = thisline;
+ thisline = exch;
+ prevfield = thisfield;
+ prevlen = thislen;
+ }
+ }
+
+ writeline (prevline, ostream, match_count);
+
+ closefiles:
+ if (ferror (istream) || fclose (istream) == EOF)
+ error (1, errno, "error reading %s", infile);
+
+ if (ferror (ostream) || fclose (ostream) == EOF)
+ error (1, errno, "error writing %s", outfile);
+
+ free (lb1.buffer);
+ free (lb2.buffer);
+}
+
+/* Given a linebuffer LINE,
+ return a pointer to the beginning of the line's field to be compared. */
+
+char *
+find_field (line)
+ struct linebuffer *line;
+{
+ register int count;
+ register char *lp = line->buffer;
+ register int size = line->length;
+ register int i = 0;
+
+ for (count = 0; count < skip_fields && i < size; count++)
+ {
+ while (i < size && isblank (lp[i]))
+ i++;
+ while (i < size && !isblank (lp[i]))
+ i++;
+ }
+
+ for (count = 0; count < skip_chars && i < size; count++)
+ i++;
+
+ return lp + i;
+}
+
+/* Return zero if two strings OLD and NEW match, nonzero if not.
+ OLD and NEW point not to the beginnings of the lines
+ but rather to the beginnings of the fields to compare.
+ OLDLEN and NEWLEN are their lengths. */
+
+int
+different (old, new, oldlen, newlen)
+ char *old;
+ char *new;
+ int oldlen;
+ int newlen;
+{
+ register int order;
+
+ if (check_chars)
+ {
+ if (oldlen > check_chars)
+ oldlen = check_chars;
+ if (newlen > check_chars)
+ newlen = check_chars;
+ }
+ order = memcmp (old, new, min (oldlen, newlen));
+ if (order == 0)
+ return oldlen - newlen;
+ return order;
+}
+
+/* Output the line in linebuffer LINE to stream STREAM
+ provided that the switches say it should be output.
+ If requested, print the number of times it occurred, as well;
+ LINECOUNT + 1 is the number of times that the line occurred. */
+
+void
+writeline (line, stream, linecount)
+ struct linebuffer *line;
+ FILE *stream;
+ int linecount;
+{
+ if ((mode == output_unique && linecount != 0)
+ || (mode == output_repeated && linecount == 0))
+ return;
+
+ if (countmode == count_occurrences)
+ fprintf (stream, "%7d\t", linecount + 1);
+
+ fwrite (line->buffer, sizeof (char), line->length, stream);
+ putc ('\n', stream);
+}
+
+void
+usage ()
+{
+ fprintf (stderr, "\
+Usage: %s [-cdu] [-f skip-fields] [-s skip-chars] [-w check-chars]\n\
+ [-#skip-fields] [+#skip-chars] [--count] [--repeated] [--unique]\n\
+ [--skip-fields=skip-fields] [--skip-chars=skip-chars]\n\
+ [--check-chars=check-chars] [infile] [outfile]\n",
+ program_name);
+ exit (1);
+}
diff --git a/src/wc.c b/src/wc.c
new file mode 100644
index 000000000..72d6ea665
--- /dev/null
+++ b/src/wc.c
@@ -0,0 +1,231 @@
+/* wc - print the number of bytes, words, and lines in files
+ Copyright (C) 1985, 1991 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by Paul Rubin, phr@ocf.berkeley.edu
+ and David MacKenzie, djm@gnu.ai.mit.edu. */
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+
+/* Size of atomic reads. */
+#define BUFFER_SIZE (16 * 1024)
+
+void error ();
+void wc ();
+void wc_file ();
+void write_counts ();
+
+/* Cumulative number of lines, words, and chars in all files so far. */
+unsigned long total_lines, total_words, total_chars;
+
+/* Which counts to print. */
+int print_lines, print_words, print_chars;
+
+/* Nonzero if we have ever read the standard input. */
+int have_read_stdin;
+
+/* The name this program was run with. */
+char *program_name;
+
+/* The error code to return to the system. */
+int exit_status;
+
+struct option longopts[] =
+{
+ {"bytes", 0, NULL, 'c'},
+ {"chars", 0, NULL, 'c'},
+ {"lines", 0, NULL, 'l'},
+ {"words", 0, NULL, 'w'},
+ {NULL, 0, NULL, 0}
+};
+
+void
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int optc;
+ int nfiles;
+
+ program_name = argv[0];
+ exit_status = 0;
+ print_lines = print_words = print_chars = 0;
+ total_lines = total_words = total_chars = 0;
+
+ while ((optc = getopt_long (argc, argv, "clw", longopts, (int *) 0)) != EOF)
+ switch (optc)
+ {
+ case 'c':
+ print_chars = 1;
+ break;
+
+ case 'l':
+ print_lines = 1;
+ break;
+
+ case 'w':
+ print_words = 1;
+ break;
+
+ default:
+ fprintf (stderr, "\
+Usage: %s [-clw] [--bytes] [--chars] [--lines] [--words] [file...]\n", argv[0]);
+ exit (1);
+ }
+
+ if (print_lines + print_words + print_chars == 0)
+ print_lines = print_words = print_chars = 1;
+
+ nfiles = argc - optind;
+
+ if (nfiles == 0)
+ {
+ have_read_stdin = 1;
+ wc (0, "");
+ }
+ else
+ {
+ for (; optind < argc; ++optind)
+ wc_file (argv[optind]);
+
+ if (nfiles > 1)
+ write_counts (total_lines, total_words, total_chars, "total");
+ }
+
+ if (have_read_stdin && close (0))
+ error (1, errno, "-");
+
+ exit (exit_status);
+}
+
+void
+wc_file (file)
+ char *file;
+{
+ if (!strcmp (file, "-"))
+ {
+ have_read_stdin = 1;
+ wc (0, file);
+ }
+ else
+ {
+ int fd = open (file, O_RDONLY);
+ if (fd == -1)
+ {
+ error (0, errno, "%s", file);
+ exit_status = 1;
+ return;
+ }
+ wc (fd, file);
+ if (close (fd))
+ {
+ error (0, errno, "%s", file);
+ exit_status = 1;
+ }
+ }
+}
+
+void
+wc (fd, file)
+ int fd;
+ char *file;
+{
+ char buf[BUFFER_SIZE];
+ register int bytes_read;
+ register int in_word = 0;
+ register unsigned long lines, words, chars;
+ struct stat stats;
+
+ lines = words = chars = 0;
+
+ if (print_chars && !print_words && !print_lines
+ && fstat (fd, &stats) == 0 && S_ISREG (stats.st_mode))
+ {
+ chars = stats.st_size;
+ }
+ else
+ {
+ while ((bytes_read = read (fd, buf, BUFFER_SIZE)) > 0)
+ {
+ register char *p = buf;
+
+ chars += bytes_read;
+ do
+ {
+ switch (*p++)
+ {
+ case '\n':
+ lines++;
+ /* Fall through. */
+ case '\r':
+ case '\f':
+ case '\t':
+ case '\v':
+ case ' ':
+ if (in_word)
+ {
+ in_word = 0;
+ words++;
+ }
+ break;
+ default:
+ in_word = 1;
+ break;
+ }
+ }
+ while (--bytes_read);
+ }
+ if (bytes_read < 0)
+ {
+ error (0, errno, "%s", file);
+ exit_status = 1;
+ }
+ if (in_word)
+ words++;
+ }
+
+ write_counts (lines, words, chars, file);
+ total_lines += lines;
+ total_words += words;
+ total_chars += chars;
+}
+
+void
+write_counts (lc, wc, cc, file)
+ unsigned long lc, wc, cc;
+ char *file;
+{
+ if (print_lines)
+ printf ("%7lu", lc);
+ if (print_words)
+ {
+ if (print_lines)
+ putchar (' ');
+ printf ("%7lu", wc);
+ }
+ if (print_chars)
+ {
+ if (print_lines || print_words)
+ putchar (' ');
+ printf ("%7lu", cc);
+ }
+ if (*file)
+ printf (" %s", file);
+ putchar ('\n');
+}