summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/join.c282
1 files changed, 168 insertions, 114 deletions
diff --git a/src/join.c b/src/join.c
index dd566b3fb..56819f6c8 100644
--- a/src/join.c
+++ b/src/join.c
@@ -15,7 +15,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- Written by Mike Haertel, mike@gnu.ai.mit.edu. */
+ Written by Mike Haertel, mike@gnu.ai.mit.edu. */
#include <config.h>
@@ -23,11 +23,27 @@
#define _GNU_SOURCE
#include <stdio.h>
+#define NDEBUG
+#include <assert.h>
#include <sys/types.h>
#include <getopt.h>
+
+#if HAVE_LIMITS_H
+# include <limits.h>
+#endif
+
+#ifndef UINT_MAX
+# define UINT_MAX ((unsigned int) ~(unsigned int) 0)
+#endif
+
+#ifndef INT_MAX
+# define INT_MAX ((int) (UINT_MAX >> 1))
+#endif
+
#include "system.h"
#include "version.h"
#include "long-options.h"
+#include "xstrtol.h"
#include "error.h"
#define join system_join
@@ -42,68 +58,68 @@ char *xrealloc ();
#define max(A, B) ((A) > (B) ? (A) : (B))
/* An element of the list describing the format of each
- output line. */
+ output line. */
struct outlist
{
- int file; /* File to take field from (1 or 2). */
- int field; /* Field number to print. */
+ int file; /* File to take field from (1 or 2). */
+ int field; /* Field number to print. */
struct outlist *next;
};
-/* A field of a line. */
+/* A field of a line. */
struct field
{
- const char *beg; /* First character in field. */
- size_t len; /* The length of the field. */
+ const char *beg; /* First character in field. */
+ size_t len; /* The length of the field. */
};
-/* A line read from an input file. Newlines are not stored. */
+/* A line read from an input file. Newlines are not stored. */
struct line
{
- char *beg; /* First character in line. */
- char *lim; /* Character after last character in line. */
- int nfields; /* Number of elements in `fields'. */
- int nfields_allocated; /* Number of elements in `fields'. */
+ char *beg; /* First character in line. */
+ char *lim; /* Character after last character in line. */
+ int nfields; /* Number of elements in `fields'. */
+ int nfields_allocated; /* Number of elements in `fields'. */
struct field *fields;
};
/* One or more consecutive lines read from a file that all have the
- same join field value. */
+ same join field value. */
struct seq
{
- int count; /* Elements used in `lines'. */
- int alloc; /* Elements allocated in `lines'. */
+ int count; /* Elements used in `lines'. */
+ int alloc; /* Elements allocated in `lines'. */
struct line *lines;
};
-/* The name this program was run with. */
+/* The name this program was run with. */
char *program_name;
-/* If nonzero, print unpairable lines in file 1 or 2. */
+/* If nonzero, print unpairable lines in file 1 or 2. */
static int print_unpairables_1, print_unpairables_2;
-/* If nonzero, print pairable lines. */
+/* If nonzero, print pairable lines. */
static int print_pairables;
-/* Empty output field filler. */
+/* Empty output field filler. */
static char *empty_filler;
-/* Field to join on. */
+/* Field to join on. */
static int join_field_1, join_field_2;
-/* List of fields to print. */
-static struct outlist *outlist;
+/* List of fields to print. */
+static struct outlist outlist_head;
-/* Last element in `outlist', where a new element can be added. */
-static struct outlist *outlist_end;
+/* Last element in `outlist', where a new element can be added. */
+static struct outlist *outlist_end = &outlist_head;
/* Tab character separating fields; if this is NUL fields are separated
by any nonempty string of white space, otherwise by exactly one
- tab character. */
+ tab character. */
static char tab;
/* When using getopt_long_only, no long option can start with
- a character that is a short option. */
+ a character that is a short option. */
static struct option const longopts[] =
{
{"j", required_argument, NULL, 'j'},
@@ -171,7 +187,7 @@ ADD_FIELD (struct line *line, const char *field, size_t len)
++(line->nfields);
}
-/* Fill in the `fields' structure in LINE. */
+/* Fill in the `fields' structure in LINE. */
static void
xfields (struct line *line)
@@ -217,7 +233,7 @@ xfields (struct line *line)
}
/* Read a line from FP into LINE and split it into fields.
- Return 0 if EOF, 1 otherwise. */
+ Return 0 if EOF, 1 otherwise. */
static int
get_line (FILE *fp, struct line *line)
@@ -272,7 +288,7 @@ initseq (struct seq *seq)
seq->lines = (struct line *) xmalloc (seq->alloc * sizeof (struct line));
}
-/* Read a line from FP and add it to SEQ. Return 0 if EOF, 1 otherwise. */
+/* Read a line from FP and add it to SEQ. Return 0 if EOF, 1 otherwise. */
static int
getseq (FILE *fp, struct seq *seq)
@@ -303,13 +319,13 @@ delseq (struct seq *seq)
}
/* Return <0 if the join field in LINE1 compares less than the one in LINE2;
- >0 if it compares greater; 0 if it compares equal. */
+ >0 if it compares greater; 0 if it compares equal. */
static int
keycmp (struct line *line1, struct line *line2)
{
- const char *beg1, *beg2; /* Start of field to compare in each file. */
- int len1, len2; /* Length of fields to compare. */
+ const char *beg1, *beg2; /* Start of field to compare in each file. */
+ int len1, len2; /* Length of fields to compare. */
int diff;
if (join_field_1 < line1->nfields)
@@ -345,7 +361,7 @@ keycmp (struct line *line1, struct line *line2)
}
/* Print field N of LINE if it exists and is nonempty, otherwise
- `empty_filler' if it is nonempty. */
+ `empty_filler' if it is nonempty. */
static void
prfield (int n, struct line *line)
@@ -364,14 +380,17 @@ prfield (int n, struct line *line)
fputs (empty_filler, stdout);
}
-/* Print the join of LINE1 and LINE2. */
+/* Print the join of LINE1 and LINE2. */
static void
prjoin (struct line *line1, struct line *line2)
{
+ const struct outlist *outlist;
+
+ outlist = outlist_head.next;
if (outlist)
{
- struct outlist *o;
+ const struct outlist *o;
prfield (outlist->field - 1, outlist->file == 1 ? line1 : line2);
for (o = outlist->next; o; o = o->next)
@@ -418,7 +437,7 @@ prjoin (struct line *line1, struct line *line2)
}
}
-/* Print the join of the files in FP1 and FP2. */
+/* Print the join of the files in FP1 and FP2. */
static void
join (fp1, fp2)
@@ -429,7 +448,7 @@ join (fp1, fp2)
struct line line;
int diff, i, j, eof1, eof2;
- /* Read the first line of each file. */
+ /* Read the first line of each file. */
initseq (&seq1);
getseq (fp1, &seq1);
initseq (&seq2);
@@ -458,7 +477,7 @@ join (fp1, fp2)
}
/* Keep reading lines from file1 as long as they continue to
- match the current line from file2. */
+ match the current line from file2. */
eof1 = 0;
do
if (!getseq (fp1, &seq1))
@@ -470,7 +489,7 @@ join (fp1, fp2)
while (!keycmp (&seq1.lines[seq1.count - 1], &seq2.lines[0]));
/* Keep reading lines from file2 as long as they continue to
- match the current line from file1. */
+ match the current line from file1. */
eof2 = 0;
do
if (!getseq (fp2, &seq2))
@@ -535,77 +554,113 @@ join (fp1, fp2)
delseq (&seq2);
}
-/* Add a field spec for field FIELD of file FILE to `outlist' and return 1,
- unless either argument is invalid; then just return 0. */
+/* Add a field spec for field FIELD of file FILE to `outlist'. */
-static int
+static void
add_field (int file, int field)
{
struct outlist *o;
- if (file < 1 || file > 2 || field < 1)
- return 0;
+ assert (file == 1 || file == 2);
+ assert (field > 0);
+
o = (struct outlist *) xmalloc (sizeof (struct outlist));
o->file = file;
o->field = field;
o->next = NULL;
- /* Add to the end of the list so the fields are in the right order. */
- if (outlist == NULL)
- outlist = o;
- else
- outlist_end->next = o;
+ /* Add to the end of the list so the fields are in the right order. */
+ outlist_end->next = o;
outlist_end = o;
-
- return 1;
}
-/* Add the comma or blank separated field spec(s) in STR to `outlist'.
- Return the number of fields added. */
+/* Convert a single field specifier string, S, to a *FILE_INDEX, *FIELD_INDEX
+ pair. If S is valid, return zero. Otherwise, give a diagnostic, don't
+ update *FILE_INDEX or *FIELD_INDEX, and return non-zero. */
static int
-add_field_list (char *str)
+decode_field_spec (const char *s, int *file_index, int *field_index)
{
- int added = 0;
- int file = -1, field = -1;
- int dot_found = 0;
+ int valid = 0;
- for (; *str; str++)
+ /* The first character must be 0, 1, or 2. */
+ switch (s[0])
{
- if (*str == ',' || ISBLANK (*str))
+ case '0':
+ if (s[1] == '\0')
{
- added += add_field (file, field);
- uni_blank.nfields = max (uni_blank.nfields, field);
- file = field = -1;
- dot_found = 0;
+ *file_index = 1;
+ *field_index = join_field_1 + 1;
+ valid = 1;
}
- else if (*str == '.')
- dot_found = 1;
- else if (ISDIGIT (*str))
- {
- if (!dot_found)
+ else
+ {
+ /* `0' must be all alone -- no `.FIELD'. */
+ error (0, 0, _("invalid field specifier: `%s'"), s);
+ }
+ break;
+
+ case '1':
+ case '2':
+ if (s[1] == '.' && s[2] != '\0')
+ {
+ strtol_error s_err;
+ long int tmp_long;
+
+ s_err = xstrtol (s + 2, NULL, 10, &tmp_long, NULL);
+ if (s_err != LONGINT_OK || tmp_long <= 0 || tmp_long > INT_MAX)
{
- if (file == -1)
- file = 0;
- file = file * 10 + *str - '0';
+ error (0, 0, _("invalid field number: `%s'"), s + 2);
}
else
{
- if (field == -1)
- field = 0;
- field = field * 10 + *str - '0';
+ *file_index = s[0] - '0';
+ *field_index = (int) tmp_long;
+ valid = 1;
}
}
- else
- return 0;
+ break;
+
+ default:
+ error (0, 0, _("invalid file number in field spec: `%s'"), s);
+ break;
}
+ return !valid;
+}
+
+/* Add the comma or blank separated field spec(s) in STR to `outlist'.
+ Return non-zero to indicate failure. */
- uni_blank.nfields = max (uni_blank.nfields, field);
- added += add_field (file, field);
- return added;
+static int
+add_field_list (const char *c_str)
+{
+ char *p, *str;
+
+ /* Make a writable copy of c_str. */
+ str = (char *) alloca (strlen (c_str) + 1);
+ strcpy (str, c_str);
+
+ p = str;
+ do
+ {
+ int invalid;
+ int file_index, field_index;
+ char *spec_item = p;
+
+ p = strchr (p, ',');
+ if (p)
+ *p++ = 0;
+ invalid = decode_field_spec (spec_item, &file_index, &field_index);
+ if (invalid)
+ return 1;
+ add_field (file_index, field_index);
+ uni_blank.nfields = max (uni_blank.nfields, field_index);
+ }
+ while (p);
+ return 0;
}
-/* Create a blank line with COUNT fields separated by tabs. */
+/* Create a blank line with COUNT fields separated by tabs. */
void
make_blank (struct line *blank, int count)
@@ -629,7 +684,7 @@ main (int argc, char **argv)
{
char *names[2];
FILE *fp1, *fp2;
- int optc, prev_optc = 0, nfiles, val;
+ int optc, prev_optc = 0, nfiles;
program_name = argv[0];
@@ -645,19 +700,25 @@ main (int argc, char **argv)
while ((optc = getopt_long_only (argc, argv, "-a:e:1:2:o:t:v:", longopts,
(int *) 0)) != EOF)
{
+ long int val;
+
switch (optc)
{
case 0:
break;
+ case 'v':
+ print_pairables = 0;
+ /* Fall through. */
+
case 'a':
- val = atoi (optarg);
+ if (xstrtol (optarg, NULL, 10, &val, NULL) != LONGINT_OK
+ || (val != 1 && val != 2))
+ error (2, 0, _("invalid field number: `%s'"), optarg);
if (val == 1)
print_unpairables_1 = 1;
- else if (val == 2)
- print_unpairables_2 = 1;
else
- error (2, 0, _("invalid file number for `-a'"));
+ print_unpairables_2 = 1;
break;
case 'e':
@@ -665,52 +726,45 @@ main (int argc, char **argv)
break;
case '1':
- val = atoi (optarg);
- if (val <= 0)
- error (2, 0, _("invalid field number for `-1'"));
- join_field_1 = val - 1;
+ if (xstrtol (optarg, NULL, 10, &val, NULL) != LONGINT_OK
+ || val <= 0 || val > INT_MAX)
+ {
+ error (2, 0, _("invalid field number for file 1: `%s'"), optarg);
+ }
+ join_field_1 = (int) val - 1;
break;
case '2':
- val = atoi (optarg);
- if (val <= 0)
- error (2, 0, _("invalid field number for `-2'"));
- join_field_2 = val - 1;
+ if (xstrtol (optarg, NULL, 10, &val, NULL) != LONGINT_OK
+ || val <= 0 || val > INT_MAX)
+ error (2, 0, _("invalid field number for file 2: `%s'"), optarg);
+ join_field_2 = (int) val - 1;
break;
case 'j':
- val = atoi (optarg);
- if (val <= 0)
- error (2, 0, _("invalid field number for `-j'"));
- join_field_1 = join_field_2 = val - 1;
+ if (xstrtol (optarg, NULL, 10, &val, NULL) != LONGINT_OK
+ || val <= 0 || val > INT_MAX)
+ error (2, 0, _("invalid field number: `%s'"), optarg);
+ join_field_1 = join_field_2 = (int) val - 1;
break;
case 'o':
- if (add_field_list (optarg) == 0)
- error (2, 0, _("invalid field list for `-o'"));
+ if (add_field_list (optarg))
+ exit (1);
break;
case 't':
tab = *optarg;
break;
- case 'v':
- val = atoi (optarg);
- if (val == 1)
- print_unpairables_1 = 1;
- else if (val == 2)
- print_unpairables_2 = 1;
- else
- error (2, 0, _("invalid file number for `-v'"));
- print_pairables = 0;
- break;
-
- case 1: /* Non-option argument. */
+ case 1: /* Non-option argument. */
if (prev_optc == 'o' && optind <= argc - 2)
{
- /* Might be continuation of args to -o. */
- if (add_field_list (optarg) > 0)
- continue; /* Don't change `prev_optc'. */
+ if (add_field_list (optarg))
+ exit (1);
+
+ /* Might be continuation of args to -o. */
+ continue; /* Don't change `prev_optc'. */
}
if (nfiles > 1)