.

author: Jim Meyering <jim@meyering.net> 1998-08-14 14:09:05 +0000
committer: Jim Meyering <jim@meyering.net> 1998-08-14 14:09:05 +0000
commit: 7b0caffd312443449cb1f398ded7104794e1dc69 (patch)
tree: ceb02d7a74d884d8a081af4eb48f37b2e9479be1 /src
parent: ed2a7b4e5325503fdb3b4766b25ae9a42618a9e3 (diff)
download: coreutils-7b0caffd312443449cb1f398ded7104794e1dc69.tar.xz
1 files changed, 2219 insertions, 0 deletions
diff --git a/src/ptx.c b/src/ptx.c
new file mode 100644
index 000000000..7f5263aab
--- /dev/null
+++ b/src/ptx.c
@@ -0,0 +1,2219 @@
+/* Permuted index for GNU, with keywords in their context.
+   Copyright © 1990, 1991, 1993, 1998 Free Software Foundation, Inc.
+   François Pinard <pinard@iro.umontreal.ca>, 1988.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software Foundation,
+   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+   François Pinard <pinard@iro.umontreal.ca> */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <getopt.h>
+#include <sys/types.h>
+#include "system.h"
+#include "argmatch.h"
+#include "bumpalloc.h"
+#include "diacrit.h"
+#include "error.h"
+#include "regex.h"
+
+/* Number of possible characters in a byte.  */
+#define CHAR_SET_SIZE 256
+
+/* The ctype definitions should work for all 256 characters.  */
+#if STDC_HEADERS
+# include <ctype.h>
+#else
+# define isspace(C) ((C) == ' ' || (C) == '\t' || (C) == '\n')
+# define isxdigit(C) \
+  (((unsigned char) (C) >= 'a' && (unsigned char) (C) <= 'f')		\
+   || ((unsigned char) (C) >= 'A' && (unsigned char) (C) <= 'F')	\
+   || ((unsigned char) (C) >= '0' && (unsigned char) (C) <= '9'))
+# define islower(C) ((unsigned char) (C) >= 'a' && (unsigned char) (C) <= 'z')
+# define isupper(C) ((unsigned char) (C) >= 'A' && (unsigned char) (C) <= 'Z')
+# define isalpha(C) (islower (C) || isupper (C))
+# define toupper(C) (islower (C) ? (C) - 'a' + 'A' : (C))
+#endif
+
+#if !defined (isascii) || defined (STDC_HEADERS)
+# undef isascii
+# define isascii(C) 1
+#endif
+
+#ifndef ISXDIGIT
+# define ISXDIGIT(C) (isascii (C) && isxdigit (C))
+#endif
+#define ISODIGIT(C) ((C) >= '0' && (C) <= '7')
+#define HEXTOBIN(C) ((C) >= 'a' && (C) <= 'f' ? (C)-'a'+10 \
+		     : (C) >= 'A' && (C) <= 'F' ? (C)-'A'+10 : (C)-'0')
+#define OCTTOBIN(C) ((C) - '0')
+
+/* Debugging the memory allocator.  */
+
+#if WITH_DMALLOC
+# define MALLOC_FUNC_CHECK 1
+# include <dmalloc.h>
+#endif
+
+/* Global definitions.  */
+
+/* Reallocation step when swallowing non regular files.  The value is not
+   the actual reallocation step, but its base two logarithm.  */
+#define SWALLOW_REALLOC_LOG 12
+
+/* Imported from "regex.c".  */
+#define Sword 1
+
+/* The name this program was run with. */
+const char *program_name;
+
+/* If nonzero, display usage information and exit.  */
+static int show_help = 0;
+
+/* If nonzero, print the version on standard output and exit.  */
+static int show_version = 0;
+
+/* Program options.  */
+
+enum Format
+{
+  UNKNOWN_FORMAT,		/* output format still unknown */
+  DUMB_FORMAT,			/* output for a dumb terminal */
+  ROFF_FORMAT,			/* output for `troff' or `nroff' */
+  TEX_FORMAT			/* output for `TeX' or `LaTeX' */
+};
+
+int gnu_extensions = 1;		/* trigger all GNU extensions */
+int auto_reference = 0;		/* references are `file_name:line_number:' */
+int input_reference = 0;	/* references at beginning of input lines */
+int right_reference = 0;	/* output references after right context  */
+int line_width = 72;		/* output line width in characters */
+int gap_size = 3;		/* number of spaces between output fields */
+const char *truncation_string = "/";
+				/* string used to mark line truncations */
+const char *macro_name = "xx";	/* macro name for roff or TeX output */
+enum Format output_format = UNKNOWN_FORMAT;
+				/* output format */
+
+int ignore_case = 0;		/* fold lower to upper case for sorting */
+const char *context_regex_string = NULL;
+				/* raw regex for end of context */
+const char *word_regex_string = NULL;
+				/* raw regex for a keyword */
+const char *break_file = NULL;	/* name of the `Break characters' file */
+const char *only_file = NULL;	/* name of the `Only words' file */
+const char *ignore_file = NULL;	/* name of the `Ignore words' file */
+
+/* A BLOCK delimit a region in memory of arbitrary size, like the copy of a
+   whole file.  A WORD is something smaller, its length should fit in a
+   short integer.  A WORD_TABLE may contain several WORDs.  */
+
+typedef struct
+  {
+    char *start;		/* pointer to beginning of region */
+    char *end;			/* pointer to end + 1 of region */
+  }
+BLOCK;
+
+typedef struct
+  {
+    char *start;		/* pointer to beginning of region */
+    short size;			/* length of the region */
+  }
+WORD;
+
+typedef struct
+  {
+    WORD *start;		/* array of WORDs */
+    size_t length;		/* number of entries */
+  }
+WORD_TABLE;
+
+/* Pattern description tables.  */
+
+/* For each character, provide its folded equivalent.  */
+unsigned char folded_chars[CHAR_SET_SIZE];
+
+/* For each character, indicate if it is part of a word.  */
+char syntax_table[CHAR_SET_SIZE];
+char *re_syntax_table = syntax_table;
+
+/* Compiled regex for end of context.  */
+struct re_pattern_buffer *context_regex;
+
+/* End of context pattern register indices.  */
+struct re_registers context_regs;
+
+/* Compiled regex for a keyword.  */
+struct re_pattern_buffer *word_regex;
+
+/* Keyword pattern register indices.  */
+struct re_registers word_regs;
+
+/* A word characters fastmap is used only when no word regexp has been
+   provided.  A word is then made up of a sequence of one or more characters
+   allowed by the fastmap.  Contains !0 if character allowed in word.  Not
+   only this is faster in most cases, but it simplifies the implementation
+   of the Break files.  */
+char word_fastmap[CHAR_SET_SIZE];
+
+/* Maximum length of any word read.  */
+int maximum_word_length;
+
+/* Maximum width of any reference used.  */
+int reference_max_width;
+
+
+/* Ignore and Only word tables.  */
+
+WORD_TABLE ignore_table;	/* table of words to ignore */
+WORD_TABLE only_table;		/* table of words to select */
+
+#define ALLOC_NEW_WORD(table) \
+  BUMP_ALLOC ((table)->start, (table)->length, 8, WORD)
+
+/* Source text table, and scanning macros.  */
+
+int number_input_files;		/* number of text input files */
+int total_line_count;		/* total number of lines seen so far */
+const char **input_file_name;	/* array of text input file names */
+int *file_line_count;		/* array of `total_line_count' values at end */
+
+BLOCK text_buffer;		/* file to study */
+char *text_buffer_maxend;	/* allocated end of text_buffer */
+
+/* SKIP_NON_WHITE used only for getting or skipping the reference.  */
+
+#define SKIP_NON_WHITE(cursor, limit) \
+  while (cursor < limit && !isspace(*cursor))				\
+    cursor++
+
+#define SKIP_WHITE(cursor, limit) \
+  while (cursor < limit && isspace(*cursor))				\
+    cursor++
+
+#define SKIP_WHITE_BACKWARDS(cursor, start) \
+  while (cursor > start && isspace(cursor[-1]))				\
+    cursor--
+
+#define SKIP_SOMETHING(cursor, limit) \
+  if (word_regex_string)						\
+    {									\
+      int count;							\
+      count = re_match (word_regex, cursor, limit - cursor, 0, NULL);	\
+      cursor += count <= 0 ? 1 : count;					\
+    }									\
+  else if (word_fastmap[(unsigned char) *cursor])			\
+    while (cursor < limit && word_fastmap[(unsigned char) *cursor])	\
+      cursor++;								\
+  else									\
+    cursor++
+
+/* Occurrences table.
+
+   The `keyword' pointer provides the central word, which is surrounded
+   by a left context and a right context.  The `keyword' and `length'
+   field allow full 8-bit characters keys, even including NULs.  At other
+   places in this program, the name `keyafter' refers to the keyword
+   followed by its right context.
+
+   The left context does not extend, towards the beginning of the file,
+   further than a distance given by the `left' value.  This value is
+   relative to the keyword beginning, it is usually negative.  This
+   insures that, except for white space, we will never have to backward
+   scan the source text, when it is time to generate the final output
+   lines.
+
+   The right context, indirectly attainable through the keyword end, does
+   not extend, towards the end of the file, further than a distance given
+   by the `right' value.  This value is relative to the keyword
+   beginning, it is usually positive.
+
+   When automatic references are used, the `reference' value is the
+   overall line number in all input files read so far, in this case, it
+   is of type (int).  When input references are used, the `reference'
+   value indicates the distance between the keyword beginning and the
+   start of the reference field, it is of type (DELTA) and usually
+   negative.  */
+
+typedef short DELTA;		/* to hold displacement within one context */
+
+typedef struct
+  {
+    WORD key;			/* description of the keyword */
+    DELTA left;			/* distance to left context start */
+    DELTA right;		/* distance to right context end */
+    int reference;		/* reference descriptor */
+  }
+OCCURS;
+
+/* The various OCCURS tables are indexed by the language.  But the time
+   being, there is no such multiple language support.  */
+
+OCCURS *occurs_table[1];	/* all words retained from the read text */
+size_t number_of_occurs[1];	/* number of used slots in occurs_table */
+
+#define ALLOC_NEW_OCCURS(language) \
+  BUMP_ALLOC (occurs_table[language], number_of_occurs[language], 9, OCCURS)
+
+
+/* Communication among output routines.  */
+
+/* Indicate if special output processing is requested for each character.  */
+char edited_flag[CHAR_SET_SIZE];
+
+int half_line_width;		/* half of line width, reference excluded */
+int before_max_width;		/* maximum width of before field */
+int keyafter_max_width;		/* maximum width of keyword-and-after field */
+int truncation_string_length;	/* length of string used to flag truncation */
+
+/* When context is limited by lines, wraparound may happen on final output:
+   the `head' pointer gives access to some supplementary left context which
+   will be seen at the end of the output line, the `tail' pointer gives
+   access to some supplementary right context which will be seen at the
+   beginning of the output line. */
+
+BLOCK tail;			/* tail field */
+int tail_truncation;		/* flag truncation after the tail field */
+
+BLOCK before;			/* before field */
+int before_truncation;		/* flag truncation before the before field */
+
+BLOCK keyafter;			/* keyword-and-after field */
+int keyafter_truncation;	/* flag truncation after the keyafter field */
+
+BLOCK head;			/* head field */
+int head_truncation;		/* flag truncation before the head field */
+
+BLOCK reference;		/* reference field for input reference mode */
+
+/* Miscellaneous routines.  */
+
+/*------------------------------------------------------.
+| Duplicate string STRING, while evaluating \-escapes.  |
+`------------------------------------------------------*/
+
+/* Loosely adapted from GNU sh-utils printf.c code.  */
+
+static char *
+copy_unescaped_string (const char *string)
+{
+  char *result;			/* allocated result */
+  char *cursor;			/* cursor in result */
+  int value;			/* value of \nnn escape */
+  int length;			/* length of \nnn escape */
+
+  result = xmalloc (strlen (string) + 1);
+  cursor = result;
+
+  while (*string)
+    if (*string == '\\')
+      {
+	string++;
+	switch (*string)
+	  {
+	  case 'x':		/* \xhhh escape, 3 chars maximum */
+	    value = 0;
+	    for (length = 0, string++;
+		 length < 3 && ISXDIGIT (*string);
+		 length++, string++)
+	      value = value * 16 + HEXTOBIN (*string);
+	    if (length == 0)
+	      {
+		*cursor++ = '\\';
+		*cursor++ = 'x';
+	      }
+	    else
+	      *cursor++ = value;
+	    break;
+
+	  case '0':		/* \0ooo escape, 3 chars maximum */
+	    value = 0;
+	    for (length = 0, string++;
+		 length < 3 && ISODIGIT (*string);
+		 length++, string++)
+	      value = value * 8 + OCTTOBIN (*string);
+	    *cursor++ = value;
+	    break;
+
+	  case 'a':		/* alert */
+#if __STDC__
+	    *cursor++ = '\a';
+#else
+	    *cursor++ = 7;
+#endif
+	    string++;
+	    break;
+
+	  case 'b':		/* backspace */
+	    *cursor++ = '\b';
+	    string++;
+	    break;
+
+	  case 'c':		/* cancel the rest of the output */
+	    while (*string)
+	      string++;
+	    break;
+
+	  case 'f':		/* form feed */
+	    *cursor++ = '\f';
+	    string++;
+	    break;
+
+	  case 'n':		/* new line */
+	    *cursor++ = '\n';
+	    string++;
+	    break;
+
+	  case 'r':		/* carriage return */
+	    *cursor++ = '\r';
+	    string++;
+	    break;
+
+	  case 't':		/* horizontal tab */
+	    *cursor++ = '\t';
+	    string++;
+	    break;
+
+	  case 'v':		/* vertical tab */
+#if __STDC__
+	    *cursor++ = '\v';
+#else
+	    *cursor++ = 11;
+#endif
+	    string++;
+	    break;
+
+	  default:
+	    *cursor++ = '\\';
+	    *cursor++ = *string++;
+	    break;
+	  }
+      }
+    else
+      *cursor++ = *string++;
+
+  *cursor = '\0';
+  return result;
+}
+
+/*-------------------------------------------------------------------.
+| Compile the regex represented by STRING, diagnose and abort if any |
+| error.  Returns the compiled regex structure.			     |
+`-------------------------------------------------------------------*/
+
+static struct re_pattern_buffer *
+alloc_and_compile_regex (const char *string)
+{
+  struct re_pattern_buffer *pattern; /* newly allocated structure */
+  const char *message;		/* error message returned by regex.c */
+
+  pattern = (struct re_pattern_buffer *)
+    xmalloc (sizeof (struct re_pattern_buffer));
+  memset (pattern, 0, sizeof (struct re_pattern_buffer));
+
+  pattern->buffer = NULL;
+  pattern->allocated = 0;
+  pattern->translate = ignore_case ? (char *) folded_chars : NULL;
+  pattern->fastmap = (char *) xmalloc ((size_t) CHAR_SET_SIZE);
+
+  message = re_compile_pattern (string, (int) strlen (string), pattern);
+  if (message)
+    error (EXIT_FAILURE, 0, _("%s (for regexp `%s')"), message, string);
+
+  /* The fastmap should be compiled before `re_match'.  The following
+     call is not mandatory, because `re_search' is always called sooner,
+     and it compiles the fastmap if this has not been done yet.  */
+
+  re_compile_fastmap (pattern);
+
+  /* Do not waste extra allocated space.  */
+
+  if (pattern->allocated > pattern->used)
+    {
+      pattern->buffer
+	= (unsigned char *) xrealloc (pattern->buffer, (size_t) pattern->used);
+      pattern->allocated = pattern->used;
+    }
+
+  return pattern;
+}
+
+/*------------------------------------------------------------------------.
+| This will initialize various tables for pattern match and compiles some |
+| regexps.								  |
+`------------------------------------------------------------------------*/
+
+static void
+initialize_regex (void)
+{
+  int character;		/* character value */
+
+  /* Initialize the regex syntax table.  */
+
+  for (character = 0; character < CHAR_SET_SIZE; character++)
+    syntax_table[character] = isalpha (character) ? Sword : 0;
+
+  /* Initialize the case folding table.  */
+
+  if (ignore_case)
+    for (character = 0; character < CHAR_SET_SIZE; character++)
+      folded_chars[character] = toupper (character);
+
+  /* Unless the user already provided a description of the end of line or
+     end of sentence sequence, select an end of line sequence to compile.
+     If the user provided an empty definition, thus disabling end of line
+     or sentence feature, make it NULL to speed up tests.  If GNU
+     extensions are enabled, use end of sentence like in GNU emacs.  If
+     disabled, use end of lines.  */
+
+  if (context_regex_string)
+    {
+      if (!*context_regex_string)
+	context_regex_string = NULL;
+    }
+  else if (gnu_extensions && !input_reference)
+    context_regex_string = "[.?!][]\"')}]*\\($\\|\t\\|  \\)[ \t\n]*";
+  else
+    context_regex_string = "\n";
+
+  if (context_regex_string)
+    context_regex = alloc_and_compile_regex (context_regex_string);
+
+  /* If the user has already provided a non-empty regexp to describe
+     words, compile it.  Else, unless this has already been done through
+     a user provided Break character file, construct a fastmap of
+     characters that may appear in a word.  If GNU extensions enabled,
+     include only letters of the underlying character set.  If disabled,
+     include almost everything, even punctuations; stop only on white
+     space.  */
+
+  if (word_regex_string && *word_regex_string)
+    word_regex = alloc_and_compile_regex (word_regex_string);
+  else if (!break_file)
+    if (gnu_extensions)
+      {
+
+	/* Simulate \w+.  */
+
+	for (character = 0; character < CHAR_SET_SIZE; character++)
+	  word_fastmap[character] = isalpha (character) ? 1 : 0;
+      }
+    else
+      {
+
+	/* Simulate [^ \t\n]+.  */
+
+	memset (word_fastmap, 1, CHAR_SET_SIZE);
+	word_fastmap[' '] = 0;
+	word_fastmap['\t'] = 0;
+	word_fastmap['\n'] = 0;
+      }
+}
+
+/*------------------------------------------------------------------------.
+| This routine will attempt to swallow a whole file name FILE_NAME into a |
+| contiguous region of memory and return a description of it into BLOCK.  |
+| Standard input is assumed whenever FILE_NAME is NULL, empty or "-".	  |
+| 									  |
+| Previously, in some cases, white space compression was attempted while  |
+| inputting text.  This was defeating some regexps like default end of	  |
+| sentence, which checks for two consecutive spaces.  If white space	  |
+| compression is ever reinstated, it should be in output routines.	  |
+`------------------------------------------------------------------------*/
+
+static void
+swallow_file_in_memory (const char *file_name, BLOCK *block)
+{
+  int file_handle;		/* file descriptor number */
+  struct stat stat_block;	/* stat block for file */
+  size_t allocated_length;	/* allocated length of memory buffer */
+  size_t used_length;		/* used length in memory buffer */
+  int read_length;		/* number of character gotten on last read */
+
+  /* As special cases, a file name which is NULL or "-" indicates standard
+     input, which is already opened.  In all other cases, open the file from
+     its name.  */
+
+  if (!file_name || !*file_name || strcmp (file_name, "-") == 0)
+    file_handle = fileno (stdin);
+  else
+    if ((file_handle = open (file_name, O_RDONLY)) < 0)
+      error (EXIT_FAILURE, errno, file_name);
+
+  /* If the file is a plain, regular file, allocate the memory buffer all at
+     once and swallow the file in one blow.  In other cases, read the file
+     repeatedly in smaller chunks until we have it all, reallocating memory
+     once in a while, as we go.  */
+
+  if (fstat (file_handle, &stat_block) < 0)
+    error (EXIT_FAILURE, errno, file_name);
+
+#if !MSDOS
+
+  /* On MSDOS, we cannot predict in memory size from file size, because of
+     end of line conversions.  */
+
+  if (S_ISREG (stat_block.st_mode))
+    {
+      block->start = (char *) xmalloc ((size_t) stat_block.st_size);
+
+      if (read (file_handle, block->start, (size_t) stat_block.st_size)
+	  != stat_block.st_size)
+	error (EXIT_FAILURE, errno, file_name);
+
+      block->end = block->start + stat_block.st_size;
+    }
+  else
+
+#endif /* not MSDOS */
+
+    {
+      block->start = (char *) xmalloc ((size_t) 1 << SWALLOW_REALLOC_LOG);
+      used_length = 0;
+      allocated_length = (1 << SWALLOW_REALLOC_LOG);
+
+      while (read_length = read (file_handle,
+				 block->start + used_length,
+				 allocated_length - used_length),
+	     read_length > 0)
+	{
+	  used_length += read_length;
+	  if (used_length == allocated_length)
+	    {
+	      allocated_length += (1 << SWALLOW_REALLOC_LOG);
+	      block->start
+		= (char *) xrealloc (block->start, allocated_length);
+	    }
+	}
+
+      if (read_length < 0)
+	error (EXIT_FAILURE, errno, file_name);
+
+      block->end = block->start + used_length;
+    }
+
+  /* Close the file, but only if it was not the standard input.  */
+
+  if (file_handle != fileno (stdin))
+    close (file_handle);
+}
+
+/* Sort and search routines.  */
+
+/*--------------------------------------------------------------------------.
+| Compare two words, FIRST and SECOND, and return 0 if they are identical.  |
+| Return less than 0 if the first word goes before the second; return	    |
+| greater than 0 if the first word goes after the second.		    |
+| 									    |
+| If a word is indeed a prefix of the other, the shorter should go first.   |
+`--------------------------------------------------------------------------*/
+
+static int
+compare_words (const void *void_first, const void *void_second)
+{
+#define first ((const WORD *) void_first)
+#define second ((const WORD *) void_second)
+  int length;			/* minimum of two lengths */
+  int counter;			/* cursor in words */
+  int value;			/* value of comparison */
+
+  length = first->size < second->size ? first->size : second->size;
+
+  if (ignore_case)
+    {
+      for (counter = 0; counter < length; counter++)
+	{
+	  value = (folded_chars [(unsigned char) (first->start[counter])]
+		   - folded_chars [(unsigned char) (second->start[counter])]);
+	  if (value != 0)
+	    return value;
+	}
+    }
+  else
+    {
+      for (counter = 0; counter < length; counter++)
+	{
+	  value = ((unsigned char) first->start[counter]
+		   - (unsigned char) second->start[counter]);
+	  if (value != 0)
+	    return value;
+	}
+    }
+
+  return first->size - second->size;
+#undef first
+#undef second
+}
+
+/*-----------------------------------------------------------------------.
+| Decides which of two OCCURS, FIRST or SECOND, should lexicographically |
+| go first.  In case of a tie, preserve the original order through a	 |
+| pointer comparison.							 |
+`-----------------------------------------------------------------------*/
+
+static int
+compare_occurs (const void *void_first, const void *void_second)
+{
+#define first ((const OCCURS *) void_first)
+#define second ((const OCCURS *) void_second)
+  int value;
+
+  value = compare_words (&first->key, &second->key);
+  return value == 0 ? first->key.start - second->key.start : value;
+#undef first
+#undef second
+}
+
+/*------------------------------------------------------------.
+| Return !0 if WORD appears in TABLE.  Uses a binary search.  |
+`------------------------------------------------------------*/
+
+static int
+search_table (WORD *word, WORD_TABLE *table)
+{
+  int lowest;			/* current lowest possible index */
+  int highest;			/* current highest possible index */
+  int middle;			/* current middle index */
+  int value;			/* value from last comparison */
+
+  lowest = 0;
+  highest = table->length - 1;
+  while (lowest <= highest)
+    {
+      middle = (lowest + highest) / 2;
+      value = compare_words (word, table->start + middle);
+      if (value < 0)
+	highest = middle - 1;
+      else if (value > 0)
+	lowest = middle + 1;
+      else
+	return 1;
+    }
+  return 0;
+}
+
+/*---------------------------------------------------------------------.
+| Sort the whole occurs table in memory.  Presumably, `qsort' does not |
+| take intermediate copies or table elements, so the sort will be      |
+| stabilized throughout the comparison routine.			       |
+`---------------------------------------------------------------------*/
+
+static void
+sort_found_occurs (void)
+{
+
+  /* Only one language for the time being.  */
+
+  qsort (occurs_table[0], number_of_occurs[0], sizeof (OCCURS),
+	 compare_occurs);
+}
+
+/* Parameter files reading routines.  */
+
+/*----------------------------------------------------------------------.
+| Read a file named FILE_NAME, containing a set of break characters.    |
+| Build a content to the array word_fastmap in which all characters are |
+| allowed except those found in the file.  Characters may be repeated.  |
+`----------------------------------------------------------------------*/
+
+static void
+digest_break_file (const char *file_name)
+{
+  BLOCK file_contents;		/* to receive a copy of the file */
+  char *cursor;			/* cursor in file copy */
+
+  swallow_file_in_memory (file_name, &file_contents);
+
+  /* Make the fastmap and record the file contents in it.  */
+
+  memset (word_fastmap, 1, CHAR_SET_SIZE);
+  for (cursor = file_contents.start; cursor < file_contents.end; cursor++)
+    word_fastmap[(unsigned char) *cursor] = 0;
+
+  if (!gnu_extensions)
+    {
+
+      /* If GNU extensions are enabled, the only way to avoid newline as
+	 a break character is to write all the break characters in the
+	 file with no newline at all, not even at the end of the file.
+	 If disabled, spaces, tabs and newlines are always considered as
+	 break characters even if not included in the break file.  */
+
+      word_fastmap[' '] = 0;
+      word_fastmap['\t'] = 0;
+      word_fastmap['\n'] = 0;
+    }
+
+  /* Return the space of the file, which is no more required.  */
+
+  free (file_contents.start);
+}
+
+/*-----------------------------------------------------------------------.
+| Read a file named FILE_NAME, containing one word per line, then	 |
+| construct in TABLE a table of WORD descriptors for them.  The routine	 |
+| swallows the whole file in memory; this is at the expense of space	 |
+| needed for newlines, which are useless; however, the reading is fast.	 |
+`-----------------------------------------------------------------------*/
+
+static void
+digest_word_file (const char *file_name, WORD_TABLE *table)
+{
+  BLOCK file_contents;		/* to receive a copy of the file */
+  char *cursor;			/* cursor in file copy */
+  char *word_start;		/* start of the current word */
+
+  swallow_file_in_memory (file_name, &file_contents);
+
+  table->start = NULL;
+  table->length = 0;
+
+  /* Read the whole file.  */
+
+  cursor = file_contents.start;
+  while (cursor < file_contents.end)
+    {
+
+      /* Read one line, and save the word in contains.  */
+
+      word_start = cursor;
+      while (cursor < file_contents.end && *cursor != '\n')
+	cursor++;
+
+      /* Record the word in table if it is not empty.  */
+
+      if (cursor > word_start)
+	{
+	  ALLOC_NEW_WORD (table);
+	  table->start[table->length].start = word_start;
+	  table->start[table->length].size = cursor - word_start;
+	  table->length++;
+	}
+
+      /* This test allows for an incomplete line at end of file.  */
+
+      if (cursor < file_contents.end)
+	cursor++;
+    }
+
+  /* Finally, sort all the words read.  */
+
+  qsort (table->start, table->length, (size_t) sizeof (WORD), compare_words);
+}
+
+/* Keyword recognition and selection.  */
+
+/*----------------------------------------------------------------------.
+| For each keyword in the source text, constructs an OCCURS structure.  |
+`----------------------------------------------------------------------*/
+
+static void
+find_occurs_in_text (void)
+{
+  char *cursor;			/* for scanning the source text */
+  char *scan;			/* for scanning the source text also */
+  char *line_start;		/* start of the current input line */
+  char *line_scan;		/* newlines scanned until this point */
+  int reference_length;		/* length of reference in input mode */
+  WORD possible_key;		/* possible key, to ease searches */
+  OCCURS *occurs_cursor;	/* current OCCURS under construction */
+
+  char *context_start;		/* start of left context */
+  char *context_end;		/* end of right context */
+  char *word_start;		/* start of word */
+  char *word_end;		/* end of word */
+  char *next_context_start;	/* next start of left context */
+
+  /* reference_length is always used within `if (input_reference)'.
+     However, GNU C diagnoses that it may be used uninitialized.  The
+     following assignment is merely to shut it up.  */
+
+  reference_length = 0;
+
+  /* Tracking where lines start is helpful for reference processing.  In
+     auto reference mode, this allows counting lines.  In input reference
+     mode, this permits finding the beginning of the references.
+
+     The first line begins with the file, skip immediately this very first
+     reference in input reference mode, to help further rejection any word
+     found inside it.  Also, unconditionally assigning these variable has
+     the happy effect of shutting up lint.  */
+
+  line_start = text_buffer.start;
+  line_scan = line_start;
+  if (input_reference)
+    {
+      SKIP_NON_WHITE (line_scan, text_buffer.end);
+      reference_length = line_scan - line_start;
+      SKIP_WHITE (line_scan, text_buffer.end);
+    }
+
+  /* Process the whole buffer, one line or one sentence at a time.  */
+
+  for (cursor = text_buffer.start;
+       cursor < text_buffer.end;
+       cursor = next_context_start)
+    {
+
+      /* `context_start' gets initialized before the processing of each
+	 line, or once for the whole buffer if no end of line or sentence
+	 sequence separator.  */
+
+      context_start = cursor;
+
+      /* If a end of line or end of sentence sequence is defined and
+	 non-empty, `next_context_start' will be recomputed to be the end of
+	 each line or sentence, before each one is processed.  If no such
+	 sequence, then `next_context_start' is set at the end of the whole
+	 buffer, which is then considered to be a single line or sentence.
+	 This test also accounts for the case of an incomplete line or
+	 sentence at the end of the buffer.  */
+
+      if (context_regex_string
+	  && (re_search (context_regex, cursor, text_buffer.end - cursor,
+			 0, text_buffer.end - cursor, &context_regs)
+	      >= 0))
+	next_context_start = cursor + context_regs.end[0];
+
+      else
+	next_context_start = text_buffer.end;
+
+      /* Include the separator into the right context, but not any suffix
+	 white space in this separator; this insures it will be seen in
+	 output and will not take more space than necessary.  */
+
+      context_end = next_context_start;
+      SKIP_WHITE_BACKWARDS (context_end, context_start);
+
+      /* Read and process a single input line or sentence, one word at a
+	 time.  */
+
+      while (1)
+	{
+	  if (word_regex)
+
+	    /* If a word regexp has been compiled, use it to skip at the
+	       beginning of the next word.  If there is no such word, exit
+	       the loop.  */
+
+	    {
+	      if (re_search (word_regex, cursor, context_end - cursor,
+			     0, context_end - cursor, &word_regs)
+		  < 0)
+		break;
+	      word_start = cursor + word_regs.start[0];
+	      word_end = cursor + word_regs.end[0];
+	    }
+	  else
+
+	    /* Avoid re_search and use the fastmap to skip to the
+	       beginning of the next word.  If there is no more word in
+	       the buffer, exit the loop.  */
+
+	    {
+	      scan = cursor;
+	      while (scan < context_end
+		     && !word_fastmap[(unsigned char) *scan])
+		scan++;
+
+	      if (scan == context_end)
+		break;
+
+	      word_start = scan;
+
+	      while (scan < context_end
+		     && word_fastmap[(unsigned char) *scan])
+		scan++;
+
+	      word_end = scan;
+	    }
+
+	  /* Skip right to the beginning of the found word.  */
+
+	  cursor = word_start;
+
+	  /* Skip any zero length word.  Just advance a single position,
+	     then go fetch the next word.  */
+
+	  if (word_end == word_start)
+	    {
+	      cursor++;
+	      continue;
+	    }
+
+	  /* This is a genuine, non empty word, so save it as a possible
+	     key.  Then skip over it.  Also, maintain the maximum length of
+	     all words read so far.  It is mandatory to take the maximum
+	     length of all words in the file, without considering if they
+	     are actually kept or rejected, because backward jumps at output
+	     generation time may fall in *any* word.  */
+
+	  possible_key.start = cursor;
+	  possible_key.size = word_end - word_start;
+	  cursor += possible_key.size;
+
+	  if (possible_key.size > maximum_word_length)
+	    maximum_word_length = possible_key.size;
+
+	  /* In input reference mode, update `line_start' from its previous
+	     value.  Count the lines just in case auto reference mode is
+	     also selected. If it happens that the word just matched is
+	     indeed part of a reference; just ignore it.  */
+
+	  if (input_reference)
+	    {
+	      while (line_scan < possible_key.start)
+		if (*line_scan == '\n')
+		  {
+		    total_line_count++;
+		    line_scan++;
+		    line_start = line_scan;
+		    SKIP_NON_WHITE (line_scan, text_buffer.end);
+		    reference_length = line_scan - line_start;
+		  }
+		else
+		  line_scan++;
+	      if (line_scan > possible_key.start)
+		continue;
+	    }
+
+	  /* Ignore the word if an `Ignore words' table exists and if it is
+	     part of it.  Also ignore the word if an `Only words' table and
+	     if it is *not* part of it.
+
+	     It is allowed that both tables be used at once, even if this
+	     may look strange for now.  Just ignore a word that would appear
+	     in both.  If regexps are eventually implemented for these
+	     tables, the Ignore table could then reject words that would
+	     have been previously accepted by the Only table.  */
+
+	  if (ignore_file && search_table (&possible_key, &ignore_table))
+	    continue;
+	  if (only_file && !search_table (&possible_key, &only_table))
+	    continue;
+
+	  /* A non-empty word has been found.  First of all, insure
+	     proper allocation of the next OCCURS, and make a pointer to
+	     where it will be constructed.  */
+
+	  ALLOC_NEW_OCCURS (0);
+	  occurs_cursor = occurs_table[0] + number_of_occurs[0];
+
+	  /* Define the refence field, if any.  */
+
+	  if (auto_reference)
+	    {
+
+	      /* While auto referencing, update `line_start' from its
+		 previous value, counting lines as we go.  If input
+		 referencing at the same time, `line_start' has been
+		 advanced earlier, and the following loop is never really
+		 executed.  */
+
+	      while (line_scan < possible_key.start)
+		if (*line_scan == '\n')
+		  {
+		    total_line_count++;
+		    line_scan++;
+		    line_start = line_scan;
+		    SKIP_NON_WHITE (line_scan, text_buffer.end);
+		  }
+		else
+		  line_scan++;
+
+	      occurs_cursor->reference = total_line_count;
+	    }
+	  else if (input_reference)
+	    {
+
+	      /* If only input referencing, `line_start' has been computed
+		 earlier to detect the case the word matched would be part
+		 of the reference.  The reference position is simply the
+		 value of `line_start'.  */
+
+	      occurs_cursor->reference
+		= (DELTA) (line_start - possible_key.start);
+	      if (reference_length > reference_max_width)
+		reference_max_width = reference_length;
+	    }
+
+	  /* Exclude the reference from the context in simple cases.  */
+
+	  if (input_reference && line_start == context_start)
+	    {
+	      SKIP_NON_WHITE (context_start, context_end);
+	      SKIP_WHITE (context_start, context_end);
+	    }
+
+	  /* Completes the OCCURS structure.  */
+
+	  occurs_cursor->key = possible_key;
+	  occurs_cursor->left = context_start - possible_key.start;
+	  occurs_cursor->right = context_end - possible_key.start;
+
+	  number_of_occurs[0]++;
+	}
+    }
+}
+
+/* Formatting and actual output - service routines.  */
+
+/*-----------------------------------------.
+| Prints some NUMBER of spaces on stdout.  |
+`-----------------------------------------*/
+
+static void
+print_spaces (int number)
+{
+  int counter;
+
+  for (counter = number; counter > 0; counter--)
+    putchar (' ');
+}
+
+/*-------------------------------------.
+| Prints the field provided by FIELD.  |
+`-------------------------------------*/
+
+static void
+print_field (BLOCK field)
+{
+  char *cursor;			/* Cursor in field to print */
+  int character;		/* Current character */
+  int base;			/* Base character, without diacritic */
+  int diacritic;		/* Diacritic code for the character */
+
+  /* Whitespace is not really compressed.  Instead, each white space
+     character (tab, vt, ht etc.) is printed as one single space.  */
+
+  for (cursor = field.start; cursor < field.end; cursor++)
+    {
+      character = (unsigned char) *cursor;
+      if (edited_flag[character])
+	{
+
+	  /* First check if this is a diacriticized character.
+
+	     This works only for TeX.  I do not know how diacriticized
+	     letters work with `roff'.  Please someone explain it to me!  */
+
+	  diacritic = todiac (character);
+	  if (diacritic != 0 && output_format == TEX_FORMAT)
+	    {
+	      base = tobase (character);
+	      switch (diacritic)
+		{
+
+		case 1:		/* Latin diphthongs */
+		  switch (base)
+		    {
+		    case 'o':
+		      fputs ("\\oe{}", stdout);
+		      break;
+
+		    case 'O':
+		      fputs ("\\OE{}", stdout);
+		      break;
+
+		    case 'a':
+		      fputs ("\\ae{}", stdout);
+		      break;
+
+		    case 'A':
+		      fputs ("\\AE{}", stdout);
+		      break;
+
+		    default:
+		      putchar (' ');
+		    }
+		  break;
+
+		case 2:		/* Acute accent */
+		  printf ("\\'%s%c", (base == 'i' ? "\\" : ""), base);
+		  break;
+
+		case 3:		/* Grave accent */
+		  printf ("\\`%s%c", (base == 'i' ? "\\" : ""), base);
+		  break;
+
+		case 4:		/* Circumflex accent */
+		  printf ("\\^%s%c", (base == 'i' ? "\\" : ""), base);
+		  break;
+
+		case 5:		/* Diaeresis */
+		  printf ("\\\"%s%c", (base == 'i' ? "\\" : ""), base);
+		  break;
+
+		case 6:		/* Tilde accent */
+		  printf ("\\~%s%c", (base == 'i' ? "\\" : ""), base);
+		  break;
+
+		case 7:		/* Cedilla */
+		  printf ("\\c{%c}", base);
+		  break;
+
+		case 8:		/* Small circle beneath */
+		  switch (base)
+		    {
+		    case 'a':
+		      fputs ("\\aa{}", stdout);
+		      break;
+
+		    case 'A':
+		      fputs ("\\AA{}", stdout);
+		      break;
+
+		    default:
+		      putchar (' ');
+		    }
+		  break;
+
+		case 9:		/* Strike through */
+		  switch (base)
+		    {
+		    case 'o':
+		      fputs ("\\o{}", stdout);
+		      break;
+
+		    case 'O':
+		      fputs ("\\O{}", stdout);
+		      break;
+
+		    default:
+		      putchar (' ');
+		    }
+		  break;
+		}
+	    }
+	  else
+
+	    /* This is not a diacritic character, so handle cases which are
+	       really specific to `roff' or TeX.  All white space processing
+	       is done as the default case of this switch.  */
+
+	    switch (character)
+	      {
+	      case '"':
+		/* In roff output format, double any quote.  */
+		putchar ('"');
+		putchar ('"');
+		break;
+
+	      case '$':
+	      case '%':
+	      case '&':
+	      case '#':
+	      case '_':
+		/* In TeX output format, precede these with a backslash.  */
+		putchar ('\\');
+		putchar (character);
+		break;
+
+	      case '{':
+	      case '}':
+		/* In TeX output format, precede these with a backslash and
+		   force mathematical mode.  */
+		printf ("$\\%c$", character);
+		break;
+
+	      case '\\':
+		/* In TeX output mode, request production of a backslash.  */
+		fputs ("\\backslash{}", stdout);
+		break;
+
+	      default:
+		/* Any other flagged character produces a single space.  */
+		putchar (' ');
+	      }
+	}
+      else
+	putchar (*cursor);
+    }
+}
+
+/* Formatting and actual output - planning routines.  */
+
+/*--------------------------------------------------------------------.
+| From information collected from command line options and input file |
+| readings, compute and fix some output parameter values.	      |
+`--------------------------------------------------------------------*/
+
+static void
+fix_output_parameters (void)
+{
+  int file_index;		/* index in text input file arrays */
+  int line_ordinal;		/* line ordinal value for reference */
+  char ordinal_string[12];	/* edited line ordinal for reference */
+  int reference_width;		/* width for the whole reference */
+  int character;		/* character ordinal */
+  const char *cursor;		/* cursor in some constant strings */
+
+  /* In auto reference mode, the maximum width of this field is
+     precomputed and subtracted from the overall line width.  Add one for
+     the column which separate the file name from the line number.  */
+
+  if (auto_reference)
+    {
+      reference_max_width = 0;
+      for (file_index = 0; file_index < number_input_files; file_index++)
+	{
+	  line_ordinal = file_line_count[file_index] + 1;
+	  if (file_index > 0)
+	    line_ordinal -= file_line_count[file_index - 1];
+	  sprintf (ordinal_string, "%d", line_ordinal);
+	  reference_width = strlen (ordinal_string);
+	  if (input_file_name[file_index])
+	    reference_width += strlen (input_file_name[file_index]);
+	  if (reference_width > reference_max_width)
+	    reference_max_width = reference_width;
+	}
+      reference_max_width++;
+      reference.start = (char *) xmalloc ((size_t) reference_max_width + 1);
+    }
+
+  /* If the reference appears to the left of the output line, reserve some
+     space for it right away, including one gap size.  */
+
+  if ((auto_reference || input_reference) && !right_reference)
+    line_width -= reference_max_width + gap_size;
+
+  /* The output lines, minimally, will contain from left to right a left
+     context, a gap, and a keyword followed by the right context with no
+     special intervening gap.  Half of the line width is dedicated to the
+     left context and the gap, the other half is dedicated to the keyword
+     and the right context; these values are computed once and for all here.
+     There also are tail and head wrap around fields, used when the keyword
+     is near the beginning or the end of the line, or when some long word
+     cannot fit in, but leave place from wrapped around shorter words.  The
+     maximum width of these fields are recomputed separately for each line,
+     on a case by case basis.  It is worth noting that it cannot happen that
+     both the tail and head fields are used at once.  */
+
+  half_line_width = line_width / 2;
+  before_max_width = half_line_width - gap_size;
+  keyafter_max_width = half_line_width;
+
+  /* If truncation_string is the empty string, make it NULL to speed up
+     tests.  In this case, truncation_string_length will never get used, so
+     there is no need to set it.  */
+
+  if (truncation_string && *truncation_string)
+    truncation_string_length = strlen (truncation_string);
+  else
+    truncation_string = NULL;
+
+  if (gnu_extensions)
+    {
+
+      /* When flagging truncation at the left of the keyword, the
+	 truncation mark goes at the beginning of the before field,
+	 unless there is a head field, in which case the mark goes at the
+	 left of the head field.  When flagging truncation at the right
+	 of the keyword, the mark goes at the end of the keyafter field,
+	 unless there is a tail field, in which case the mark goes at the
+	 end of the tail field.  Only eight combination cases could arise
+	 for truncation marks:
+
+	 . None.
+	 . One beginning the before field.
+	 . One beginning the head field.
+	 . One ending the keyafter field.
+	 . One ending the tail field.
+	 . One beginning the before field, another ending the keyafter field.
+	 . One ending the tail field, another beginning the before field.
+	 . One ending the keyafter field, another beginning the head field.
+
+	 So, there is at most two truncation marks, which could appear both
+	 on the left side of the center of the output line, both on the
+	 right side, or one on either side.  */
+
+      before_max_width -= 2 * truncation_string_length;
+      keyafter_max_width -= 2 * truncation_string_length;
+    }
+  else
+    {
+
+      /* I never figured out exactly how UNIX' ptx plans the output width
+	 of its various fields.  If GNU extensions are disabled, do not
+	 try computing the field widths correctly; instead, use the
+	 following formula, which does not completely imitate UNIX' ptx,
+	 but almost.  */
+
+      keyafter_max_width -= 2 * truncation_string_length + 1;
+    }
+
+  /* Compute which characters need special output processing.  Initialize
+     by flagging any white space character.  Some systems do not consider
+     form feed as a space character, but we do.  */
+
+  for (character = 0; character < CHAR_SET_SIZE; character++)
+    edited_flag[character] = isspace (character) != 0;
+  edited_flag['\f'] = 1;
+
+  /* Complete the special character flagging according to selected output
+     format.  */
+
+  switch (output_format)
+    {
+    case UNKNOWN_FORMAT:
+      /* Should never happen.  */
+
+    case DUMB_FORMAT:
+      break;
+
+    case ROFF_FORMAT:
+
+      /* `Quote' characters should be doubled.  */
+
+      edited_flag['"'] = 1;
+      break;
+
+    case TEX_FORMAT:
+
+      /* Various characters need special processing.  */
+
+      for (cursor = "$%&#_{}\\"; *cursor; cursor++)
+	edited_flag[(unsigned char) *cursor] = 1;
+
+      /* Any character with 8th bit set will print to a single space, unless
+	 it is diacriticized.  */
+
+      for (character = 0200; character < CHAR_SET_SIZE; character++)
+	edited_flag[character] = todiac (character) != 0;
+      break;
+    }
+}
+
+/*------------------------------------------------------------------.
+| Compute the position and length of all the output fields, given a |
+| pointer to some OCCURS.					    |
+`------------------------------------------------------------------*/
+
+static void
+define_all_fields (OCCURS *occurs)
+{
+  int tail_max_width;		/* allowable width of tail field */
+  int head_max_width;		/* allowable width of head field */
+  char *cursor;			/* running cursor in source text */
+  char *left_context_start;	/* start of left context */
+  char *right_context_end;	/* end of right context */
+  char *left_field_start;	/* conservative start for `head'/`before' */
+  int file_index;		/* index in text input file arrays */
+  const char *file_name;	/* file name for reference */
+  int line_ordinal;		/* line ordinal for reference */
+
+  /* Define `keyafter', start of left context and end of right context.
+     `keyafter' starts at the saved position for keyword and extend to the
+     right from the end of the keyword, eating separators or full words, but
+     not beyond maximum allowed width for `keyafter' field or limit for the
+     right context.  Suffix spaces will be removed afterwards.  */
+
+  keyafter.start = occurs->key.start;
+  keyafter.end = keyafter.start + occurs->key.size;
+  left_context_start = keyafter.start + occurs->left;
+  right_context_end = keyafter.start + occurs->right;
+
+  cursor = keyafter.end;
+  while (cursor < right_context_end
+	 && cursor <= keyafter.start + keyafter_max_width)
+    {
+      keyafter.end = cursor;
+      SKIP_SOMETHING (cursor, right_context_end);
+    }
+  if (cursor <= keyafter.start + keyafter_max_width)
+    keyafter.end = cursor;
+
+  keyafter_truncation = truncation_string && keyafter.end < right_context_end;
+
+  SKIP_WHITE_BACKWARDS (keyafter.end, keyafter.start);
+
+  /* When the left context is wide, it might take some time to catch up from
+     the left context boundary to the beginning of the `head' or `before'
+     fields.  So, in this case, to speed the catchup, we jump back from the
+     keyword, using some secure distance, possibly falling in the middle of
+     a word.  A secure backward jump would be at least half the maximum
+     width of a line, plus the size of the longest word met in the whole
+     input.  We conclude this backward jump by a skip forward of at least
+     one word.  In this manner, we should not inadvertently accept only part
+     of a word.  From the reached point, when it will be time to fix the
+     beginning of `head' or `before' fields, we will skip forward words or
+     delimiters until we get sufficiently near.  */
+
+  if (-occurs->left > half_line_width + maximum_word_length)
+    {
+      left_field_start
+	= keyafter.start - (half_line_width + maximum_word_length);
+      SKIP_SOMETHING (left_field_start, keyafter.start);
+    }
+  else
+    left_field_start = keyafter.start + occurs->left;
+
+  /* `before' certainly ends at the keyword, but not including separating
+     spaces.  It starts after than the saved value for the left context, by
+     advancing it until it falls inside the maximum allowed width for the
+     before field.  There will be no prefix spaces either.  `before' only
+     advances by skipping single separators or whole words. */
+
+  before.start = left_field_start;
+  before.end = keyafter.start;
+  SKIP_WHITE_BACKWARDS (before.end, before.start);
+
+  while (before.start + before_max_width < before.end)
+    SKIP_SOMETHING (before.start, before.end);
+
+  if (truncation_string)
+    {
+      cursor = before.start;
+      SKIP_WHITE_BACKWARDS (cursor, text_buffer.start);
+      before_truncation = cursor > left_context_start;
+    }
+  else
+    before_truncation = 0;
+
+  SKIP_WHITE (before.start, text_buffer.end);
+
+  /* The tail could not take more columns than what has been left in the
+     left context field, and a gap is mandatory.  It starts after the
+     right context, and does not contain prefixed spaces.  It ends at
+     the end of line, the end of buffer or when the tail field is full,
+     whichever comes first.  It cannot contain only part of a word, and
+     has no suffixed spaces.  */
+
+  tail_max_width
+    = before_max_width - (before.end - before.start) - gap_size;
+
+  if (tail_max_width > 0)
+    {
+      tail.start = keyafter.end;
+      SKIP_WHITE (tail.start, text_buffer.end);
+
+      tail.end = tail.start;
+      cursor = tail.end;
+      while (cursor < right_context_end
+	     && cursor < tail.start + tail_max_width)
+	{
+	  tail.end = cursor;
+	  SKIP_SOMETHING (cursor, right_context_end);
+	}
+
+      if (cursor < tail.start + tail_max_width)
+	tail.end = cursor;
+
+      if (tail.end > tail.start)
+	{
+	  keyafter_truncation = 0;
+	  tail_truncation = truncation_string && tail.end < right_context_end;
+	}
+      else
+	tail_truncation = 0;
+
+      SKIP_WHITE_BACKWARDS (tail.end, tail.start);
+    }
+  else
+    {
+
+      /* No place left for a tail field.  */
+
+      tail.start = NULL;
+      tail.end = NULL;
+      tail_truncation = 0;
+    }
+
+  /* `head' could not take more columns than what has been left in the right
+     context field, and a gap is mandatory.  It ends before the left
+     context, and does not contain suffixed spaces.  Its pointer is advanced
+     until the head field has shrunk to its allowed width.  It cannot
+     contain only part of a word, and has no suffixed spaces.  */
+
+  head_max_width
+    = keyafter_max_width - (keyafter.end - keyafter.start) - gap_size;
+
+  if (head_max_width > 0)
+    {
+      head.end = before.start;
+      SKIP_WHITE_BACKWARDS (head.end, text_buffer.start);
+
+      head.start = left_field_start;
+      while (head.start + head_max_width < head.end)
+	SKIP_SOMETHING (head.start, head.end);
+
+      if (head.end > head.start)
+	{
+	  before_truncation = 0;
+	  head_truncation = (truncation_string
+			     && head.start > left_context_start);
+	}
+      else
+	head_truncation = 0;
+
+      SKIP_WHITE (head.start, head.end);
+    }
+  else
+    {
+
+      /* No place left for a head field.  */
+
+      head.start = NULL;
+      head.end = NULL;
+      head_truncation = 0;
+    }
+
+  if (auto_reference)
+    {
+
+      /* Construct the reference text in preallocated space from the file
+	 name and the line number.  Find out in which file the reference
+	 occurred.  Standard input yields an empty file name.  Insure line
+	 numbers are one based, even if they are computed zero based.  */
+
+      file_index = 0;
+      while (file_line_count[file_index] < occurs->reference)
+	file_index++;
+
+      file_name = input_file_name[file_index];
+      if (!file_name)
+	file_name = "";
+
+      line_ordinal = occurs->reference + 1;
+      if (file_index > 0)
+	line_ordinal -= file_line_count[file_index - 1];
+
+      sprintf (reference.start, "%s:%d", file_name, line_ordinal);
+      reference.end = reference.start + strlen (reference.start);
+    }
+  else if (input_reference)
+    {
+
+      /* Reference starts at saved position for reference and extends right
+	 until some white space is met.  */
+
+      reference.start = keyafter.start + (DELTA) occurs->reference;
+      reference.end = reference.start;
+      SKIP_NON_WHITE (reference.end, right_context_end);
+    }
+}
+
+/* Formatting and actual output - control routines.  */
+
+/*----------------------------------------------------------------------.
+| Output the current output fields as one line for `troff' or `nroff'.  |
+`----------------------------------------------------------------------*/
+
+static void
+output_one_roff_line (void)
+{
+  /* Output the `tail' field.  */
+
+  printf (".%s \"", macro_name);
+  print_field (tail);
+  if (tail_truncation)
+    fputs (truncation_string, stdout);
+  putchar ('"');
+
+  /* Output the `before' field.  */
+
+  fputs (" \"", stdout);
+  if (before_truncation)
+    fputs (truncation_string, stdout);
+  print_field (before);
+  putchar ('"');
+
+  /* Output the `keyafter' field.  */
+
+  fputs (" \"", stdout);
+  print_field (keyafter);
+  if (keyafter_truncation)
+    fputs (truncation_string, stdout);
+  putchar ('"');
+
+  /* Output the `head' field.  */
+
+  fputs (" \"", stdout);
+  if (head_truncation)
+    fputs (truncation_string, stdout);
+  print_field (head);
+  putchar ('"');
+
+  /* Conditionally output the `reference' field.  */
+
+  if (auto_reference || input_reference)
+    {
+      fputs (" \"", stdout);
+      print_field (reference);
+      putchar ('"');
+    }
+
+  putchar ('\n');
+}
+
+/*---------------------------------------------------------.
+| Output the current output fields as one line for `TeX'.  |
+`---------------------------------------------------------*/
+
+static void
+output_one_tex_line (void)
+{
+  BLOCK key;			/* key field, isolated */
+  BLOCK after;			/* after field, isolated */
+  char *cursor;			/* running cursor in source text */
+
+  printf ("\\%s ", macro_name);
+  fputs ("{", stdout);
+  print_field (tail);
+  fputs ("}{", stdout);
+  print_field (before);
+  fputs ("}{", stdout);
+  key.start = keyafter.start;
+  after.end = keyafter.end;
+  cursor = keyafter.start;
+  SKIP_SOMETHING (cursor, keyafter.end);
+  key.end = cursor;
+  after.start = cursor;
+  print_field (key);
+  fputs ("}{", stdout);
+  print_field (after);
+  fputs ("}{", stdout);
+  print_field (head);
+  fputs ("}", stdout);
+  if (auto_reference || input_reference)
+    {
+      fputs ("{", stdout);
+      print_field (reference);
+      fputs ("}", stdout);
+    }
+  fputs ("\n", stdout);
+}
+
+/*-------------------------------------------------------------------.
+| Output the current output fields as one line for a dumb terminal.  |
+`-------------------------------------------------------------------*/
+
+static void
+output_one_dumb_line (void)
+{
+  if (!right_reference)
+    if (auto_reference)
+      {
+
+        /* Output the `reference' field, in such a way that GNU emacs
+           next-error will handle it.  The ending colon is taken from the
+           gap which follows.  */
+
+	print_field (reference);
+	putchar (':');
+	print_spaces (reference_max_width
+		      + gap_size
+		      - (reference.end - reference.start)
+		      - 1);
+      }
+    else
+      {
+
+	/* Output the `reference' field and its following gap.  */
+
+	print_field (reference);
+	print_spaces (reference_max_width
+		    + gap_size
+		    - (reference.end - reference.start));
+      }
+
+  if (tail.start < tail.end)
+    {
+      /* Output the `tail' field.  */
+
+      print_field (tail);
+      if (tail_truncation)
+	fputs (truncation_string, stdout);
+
+      print_spaces (half_line_width - gap_size
+		    - (before.end - before.start)
+		    - (before_truncation ? truncation_string_length : 0)
+		    - (tail.end - tail.start)
+		    - (tail_truncation ? truncation_string_length : 0));
+    }
+  else
+    print_spaces (half_line_width - gap_size
+		  - (before.end - before.start)
+		  - (before_truncation ? truncation_string_length : 0));
+
+  /* Output the `before' field.  */
+
+  if (before_truncation)
+    fputs (truncation_string, stdout);
+  print_field (before);
+
+  print_spaces (gap_size);
+
+  /* Output the `keyafter' field.  */
+
+  print_field (keyafter);
+  if (keyafter_truncation)
+    fputs (truncation_string, stdout);
+
+  if (head.start < head.end)
+    {
+      /* Output the `head' field.  */
+
+      print_spaces (half_line_width
+		    - (keyafter.end - keyafter.start)
+		    - (keyafter_truncation ? truncation_string_length : 0)
+		    - (head.end - head.start)
+		    - (head_truncation ? truncation_string_length : 0));
+      if (head_truncation)
+	fputs (truncation_string, stdout);
+      print_field (head);
+    }
+  else
+
+    if ((auto_reference || input_reference) && right_reference)
+      print_spaces (half_line_width
+		    - (keyafter.end - keyafter.start)
+		    - (keyafter_truncation ? truncation_string_length : 0));
+
+  if ((auto_reference || input_reference) && right_reference)
+    {
+      /* Output the `reference' field.  */
+
+      print_spaces (gap_size);
+      print_field (reference);
+    }
+
+  fputs ("\n", stdout);
+}
+
+/*------------------------------------------------------------------------.
+| Scan the whole occurs table and, for each entry, output one line in the |
+| appropriate format.							  |
+`------------------------------------------------------------------------*/
+
+static void
+generate_all_output (void)
+{
+  int occurs_index;		/* index of keyword entry being processed */
+  OCCURS *occurs_cursor;	/* current keyword entry being processed */
+
+
+  /* The following assignments are useful to provide default values in case
+     line contexts or references are not used, in which case these variables
+     would never be computed.  */
+
+  tail.start = NULL;
+  tail.end = NULL;
+  tail_truncation = 0;
+
+  head.start = NULL;
+  head.end = NULL;
+  head_truncation = 0;
+
+
+  /* Loop over all keyword occurrences.  */
+
+  occurs_cursor = occurs_table[0];
+
+  for (occurs_index = 0; occurs_index < number_of_occurs[0]; occurs_index++)
+    {
+      /* Compute the exact size of every field and whenever truncation flags
+	 are present or not.  */
+
+      define_all_fields (occurs_cursor);
+
+      /* Produce one output line according to selected format.  */
+
+      switch (output_format)
+	{
+	case UNKNOWN_FORMAT:
+	  /* Should never happen.  */
+
+	case DUMB_FORMAT:
+	  output_one_dumb_line ();
+	  break;
+
+	case ROFF_FORMAT:
+	  output_one_roff_line ();
+	  break;
+
+	case TEX_FORMAT:
+	  output_one_tex_line ();
+	  break;
+	}
+
+      /* Advance the cursor into the occurs table.  */
+
+      occurs_cursor++;
+    }
+}
+
+/* Option decoding and main program.  */
+
+/*------------------------------------------------------.
+| Print program identification and options, then exit.  |
+`------------------------------------------------------*/
+
+static void
+usage (int status)
+{
+  if (status != EXIT_SUCCESS)
+    fprintf (stderr, _("Try `%s --help' for more information.\n"),
+	     program_name);
+  else
+    {
+      printf (_("\
+Usage: %s [OPTION]... [INPUT]...   (without -G)\n\
+  or:  %s -G [OPTION]... [INPUT [OUTPUT]]\n"),
+	      program_name, program_name);
+      fputs (_("\
+Mandatory arguments to long options are mandatory for short options too.\n\
+\n\
+  -A, --auto-reference           output automatically generated references\n\
+  -C, --copyright                display Copyright and copying conditions\n\
+  -G, --traditional              behave more like System V `ptx'\n\
+  -F, --flag-truncation=STRING   use STRING for flagging line truncations\n\
+  -M, --macro-name=STRING        macro name to use instead of `xx'\n\
+  -O, --format=roff              generate output as roff directives\n\
+  -R, --right-side-refs          put references at right, not counted in -w\n\
+  -S, --sentence-regexp=REGEXP   for end of lines or end of sentences\n\
+  -T, --format=tex               generate output as TeX directives\n\
+  -W, --word-regexp=REGEXP       use REGEXP to match each keyword\n\
+  -b, --break-file=FILE          word break characters in this FILE\n\
+  -f, --ignore-case              fold lower case to upper case for sorting\n\
+  -g, --gap-size=NUMBER          gap size in columns between output fields\n\
+  -i, --ignore-file=FILE         read ignore word list from FILE\n\
+  -o, --only-file=FILE           read only word list from this FILE\n\
+  -r, --references               first field of each line is a reference\n\
+  -t, --typeset-mode               - not implemented -\n\
+  -w, --width=NUMBER             output width in columns, reference excluded\n\
+      --help                     display this help and exit\n\
+      --version                  output version information and exit\n\
+\n\
+With no FILE or if FILE is -, read Standard Input.  `-F /' by default.\n"),
+	     stdout);
+    }
+  exit (status);
+}
+
+/*----------------------------------------------------------------------.
+| Main program.  Decode ARGC arguments passed through the ARGV array of |
+| strings, then launch execution.				        |
+`----------------------------------------------------------------------*/
+
+/* Long options equivalences.  */
+static const struct option long_options[] =
+{
+  {"auto-reference", no_argument, NULL, 'A'},
+  {"break-file", required_argument, NULL, 'b'},
+  {"copyright", no_argument, NULL, 'C'},
+  {"flag-truncation", required_argument, NULL, 'F'},
+  {"ignore-case", no_argument, NULL, 'f'},
+  {"gap-size", required_argument, NULL, 'g'},
+  {"help", no_argument, &show_help, 1},
+  {"ignore-file", required_argument, NULL, 'i'},
+  {"macro-name", required_argument, NULL, 'M'},
+  {"only-file", required_argument, NULL, 'o'},
+  {"references", no_argument, NULL, 'r'},
+  {"right-side-refs", no_argument, NULL, 'R'},
+  {"format", required_argument, NULL, 10},
+  {"sentence-regexp", required_argument, NULL, 'S'},
+  {"traditional", no_argument, NULL, 'G'},
+  {"typeset-mode", no_argument, NULL, 't'},
+  {"version", no_argument, &show_version, 1},
+  {"width", required_argument, NULL, 'w'},
+  {"word-regexp", required_argument, NULL, 'W'},
+  {0, 0, 0, 0},
+};
+
+static char const* const format_args[] =
+{
+  "roff", "tex", 0
+};
+
+int
+main (int argc, char *const argv[])
+{
+  int optchar;			/* argument character */
+  int file_index;		/* index in text input file arrays */
+
+  /* Decode program options.  */
+
+  program_name = argv[0];
+  setlocale (LC_ALL, "");
+
+#if HAVE_SETCHRCLASS
+  setchrclass (NULL);
+#endif
+
+  while (optchar = getopt_long (argc, argv, "ACF:GM:ORS:TW:b:i:fg:o:trw:",
+				long_options, NULL),
+	 optchar != EOF)
+    {
+      switch (optchar)
+	{
+	default:
+	  usage (EXIT_FAILURE);
+
+	case 0:
+	  break;
+
+	case 'C':
+	  fputs (_("\
+This program is free software; you can redistribute it and/or modify\n\
+it under the terms of the GNU General Public License as published by\n\
+the Free Software Foundation; either version 2, or (at your option)\n\
+any later version.\n\
+\n\
+This program is distributed in the hope that it will be useful,\n\
+but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n\
+GNU General Public License for more details.\n\
+\n\
+You should have received a copy of the GNU General Public License\n\
+along with this program; if not, write to the Free Software Foundation,\n\
+Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n"),
+		 stdout);
+
+	  exit (EXIT_SUCCESS);
+
+	case 'G':
+	  gnu_extensions = 0;
+	  break;
+
+	case 'b':
+	  break_file = optarg;
+	  break;
+
+	case 'f':
+	  ignore_case = 1;
+	  break;
+
+	case 'g':
+	  gap_size = atoi (optarg);
+	  break;
+
+	case 'i':
+	  ignore_file = optarg;
+	  break;
+
+	case 'o':
+	  only_file = optarg;
+	  break;
+
+	case 'r':
+	  input_reference = 1;
+	  break;
+
+	case 't':
+	  /* Yet to understand...  */
+	  break;
+
+	case 'w':
+	  line_width = atoi (optarg);
+	  break;
+
+	case 'A':
+	  auto_reference = 1;
+	  break;
+
+	case 'F':
+	  truncation_string = copy_unescaped_string (optarg);
+	  break;
+
+	case 'M':
+	  macro_name = optarg;
+	  break;
+
+	case 'O':
+	  output_format = ROFF_FORMAT;
+	  break;
+
+	case 'R':
+	  right_reference = 1;
+	  break;
+
+	case 'S':
+	  context_regex_string = copy_unescaped_string (optarg);
+	  break;
+
+	case 'T':
+	  output_format = TEX_FORMAT;
+	  break;
+
+	case 'W':
+	  word_regex_string = copy_unescaped_string (optarg);
+	  break;
+
+	case 10:
+	  switch (argmatch (optarg, format_args))
+	    {
+	    default:
+	      usage (EXIT_FAILURE);
+
+	    case 0:
+	      output_format = ROFF_FORMAT;
+	      break;
+
+	    case 1:
+	      output_format = TEX_FORMAT;
+	      break;
+	    }
+	}
+    }
+
+  /* Process trivial options.  */
+
+  if (show_help)
+    usage (EXIT_SUCCESS);
+
+  if (show_version)
+    {
+      printf ("ptx (%s) %s\n", GNU_PACKAGE, VERSION);
+      exit (EXIT_SUCCESS);
+    }
+
+  /* Change the default Ignore file if one is defined.  */
+
+#ifdef DEFAULT_IGNORE_FILE
+  if (!ignore_file)
+    ignore_file = DEFAULT_IGNORE_FILE;
+#endif
+
+  /* Process remaining arguments.  If GNU extensions are enabled, process
+     all arguments as input parameters.  If disabled, accept at most two
+     arguments, the second of which is an output parameter.  */
+
+  if (optind == argc)
+    {
+
+      /* No more argument simply means: read standard input.  */
+
+      input_file_name = (const char **) xmalloc (sizeof (const char *));
+      file_line_count = (int *) xmalloc (sizeof (int));
+      number_input_files = 1;
+      input_file_name[0] = NULL;
+    }
+  else if (gnu_extensions)
+    {
+      number_input_files = argc - optind;
+      input_file_name
+	= (const char **) xmalloc (number_input_files * sizeof (const char *));
+      file_line_count
+	= (int *) xmalloc (number_input_files * sizeof (int));
+
+      for (file_index = 0; file_index < number_input_files; file_index++)
+	{
+	  input_file_name[file_index] = argv[optind];
+	  if (!*argv[optind] || strcmp (argv[optind], "-") == 0)
+	    input_file_name[0] = NULL;
+	  else
+	    input_file_name[0] = argv[optind];
+	  optind++;
+	}
+    }
+  else
+    {
+
+      /* There is one necessary input file.  */
+
+      number_input_files = 1;
+      input_file_name = (const char **) xmalloc (sizeof (const char *));
+      file_line_count = (int *) xmalloc (sizeof (int));
+      if (!*argv[optind] || strcmp (argv[optind], "-") == 0)
+	input_file_name[0] = NULL;
+      else
+	input_file_name[0] = argv[optind];
+      optind++;
+
+      /* Redirect standard output, only if requested.  */
+
+      if (optind < argc)
+	{
+	  fclose (stdout);
+	  if (fopen (argv[optind], "w") == NULL)
+	    error (EXIT_FAILURE, errno, argv[optind]);
+	  optind++;
+	}
+
+      /* Diagnose any other argument as an error.  */
+
+      if (optind < argc)
+	usage (EXIT_FAILURE);
+    }
+
+  /* If the output format has not been explicitly selected, choose dumb
+     terminal format if GNU extensions are enabled, else `roff' format.  */
+
+  if (output_format == UNKNOWN_FORMAT)
+    output_format = gnu_extensions ? DUMB_FORMAT : ROFF_FORMAT;
+
+  /* Initialize the main tables.  */
+
+  initialize_regex ();
+
+  /* Read `Break character' file, if any.  */
+
+  if (break_file)
+    digest_break_file (break_file);
+
+  /* Read `Ignore words' file and `Only words' files, if any.  If any of
+     these files is empty, reset the name of the file to NULL, to avoid
+     unnecessary calls to search_table. */
+
+  if (ignore_file)
+    {
+      digest_word_file (ignore_file, &ignore_table);
+      if (ignore_table.length == 0)
+	ignore_file = NULL;
+    }
+
+  if (only_file)
+    {
+      digest_word_file (only_file, &only_table);
+      if (only_table.length == 0)
+	only_file = NULL;
+    }
+
+  /* Prepare to study all the input files.  */
+
+  number_of_occurs[0] = 0;
+  total_line_count = 0;
+  maximum_word_length = 0;
+  reference_max_width = 0;
+
+  for (file_index = 0; file_index < number_input_files; file_index++)
+    {
+
+      /* Read the file in core, than study it.  */
+
+      swallow_file_in_memory (input_file_name[file_index], &text_buffer);
+      find_occurs_in_text ();
+
+      /* Maintain for each file how many lines has been read so far when its
+	 end is reached.  Incrementing the count first is a simple kludge to
+	 handle a possible incomplete line at end of file.  */
+
+      total_line_count++;
+      file_line_count[file_index] = total_line_count;
+    }
+
+  /* Do the output process phase.  */
+
+  sort_found_occurs ();
+  fix_output_parameters ();
+  generate_all_output ();
+
+  /* All done.  */
+
+  exit (EXIT_SUCCESS);
+}
author	Jim Meyering <jim@meyering.net>	1998-08-14 14:09:05 +0000
committer	Jim Meyering <jim@meyering.net>	1998-08-14 14:09:05 +0000
commit	7b0caffd312443449cb1f398ded7104794e1dc69 (patch)
tree	ceb02d7a74d884d8a081af4eb48f37b2e9479be1 /src
parent	ed2a7b4e5325503fdb3b4766b25ae9a42618a9e3 (diff)
download	coreutils-7b0caffd312443449cb1f398ded7104794e1dc69.tar.xz