summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--NEWS6
-rw-r--r--doc/coreutils.texi19
-rw-r--r--src/join.c88
-rwxr-xr-xtests/misc/join20
4 files changed, 96 insertions, 37 deletions
diff --git a/NEWS b/NEWS
index e420cd93f..5b389f654 100644
--- a/NEWS
+++ b/NEWS
@@ -16,6 +16,12 @@ GNU coreutils NEWS -*- outline -*-
uniq -f NUM no longer tries to process fields after end of line.
[bug introduced in coreutils-7.0]
+** New features
+
+ join now supports -o 'auto' which will automatically infer the
+ output format from the first line in each file, to ensure
+ the same number of fields are output for each line.
+
* Noteworthy changes in release 8.9 (2011-01-04) [stable]
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index ebe379eb6..d6e98c6d6 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -5701,8 +5701,8 @@ Do not check that both input files are in sorted order. This is the default.
@item -e @var{string}
@opindex -e
-Replace those output fields that are missing in the input with
-@var{string}.
+Replace those output fields that are missing in the input with @var{string}.
+I.E. missing fields specified with the @option{-12jo} options.
@item --header
@opindex --header
@@ -5733,10 +5733,17 @@ Join on field @var{field} (a positive integer) of file 2.
Equivalent to @option{-1 @var{field} -2 @var{field}}.
@item -o @var{field-list}
-Construct each output line according to the format in @var{field-list}.
-Each element in @var{field-list} is either the single character @samp{0} or
-has the form @var{m.n} where the file number, @var{m}, is @samp{1} or
-@samp{2} and @var{n} is a positive field number.
+@itemx -o auto
+If the keyword @samp{auto} is specified, infer the output format from
+the first line in each file. This is the same as the default output format
+but also ensures the same number of fields are output for each line.
+Missing fields are replaced with the @option{-e} option and extra fields
+are discarded.
+
+Otherwise, construct each output line according to the format in
+@var{field-list}. Each element in @var{field-list} is either the single
+character @samp{0} or has the form @var{m.n} where the file number, @var{m},
+is @samp{1} or @samp{2} and @var{n} is a positive field number.
A field specification of @samp{0} denotes the join field.
In most cases, the functionality of the @samp{0} field spec
diff --git a/src/join.c b/src/join.c
index afda5a16e..bf7e908af 100644
--- a/src/join.c
+++ b/src/join.c
@@ -112,6 +112,13 @@ static bool issued_disorder_warning[2];
/* Empty output field filler. */
static char const *empty_filler;
+/* Whether to ensure the same number of fields are output from each line. */
+static bool autoformat;
+/* The number of fields to output for each line.
+ Only significant when autoformat is true. */
+static size_t autocount_1;
+static size_t autocount_2;
+
/* Field to join on; SIZE_MAX means they haven't been determined yet. */
static size_t join_field_1 = SIZE_MAX;
static size_t join_field_2 = SIZE_MAX;
@@ -210,7 +217,8 @@ else fields are separated by CHAR. Any FIELD is a field number counted\n\
from 1. FORMAT is one or more comma or blank separated specifications,\n\
each being `FILENUM.FIELD' or `0'. Default FORMAT outputs the join field,\n\
the remaining fields from FILE1, the remaining fields from FILE2, all\n\
-separated by CHAR.\n\
+separated by CHAR. If FORMAT is the keyword 'auto', then the first\n\
+line of each file determines the number of fields output for each line.\n\
\n\
Important: FILE1 and FILE2 must be sorted on the join fields.\n\
E.g., use ` sort -k 1b,1 ' if `join' has no options,\n\
@@ -527,6 +535,27 @@ prfield (size_t n, struct line const *line)
fputs (empty_filler, stdout);
}
+/* Output all the fields in line, other than the join field. */
+
+static void
+prfields (struct line const *line, size_t join_field, size_t autocount)
+{
+ size_t i;
+ size_t nfields = autoformat ? autocount : line->nfields;
+ char output_separator = tab < 0 ? ' ' : tab;
+
+ for (i = 0; i < join_field && i < nfields; ++i)
+ {
+ putchar (output_separator);
+ prfield (i, line);
+ }
+ for (i = join_field + 1; i < nfields; ++i)
+ {
+ putchar (output_separator);
+ prfield (i, line);
+ }
+}
+
/* Print the join of LINE1 and LINE2. */
static void
@@ -534,6 +563,8 @@ prjoin (struct line const *line1, struct line const *line2)
{
const struct outlist *outlist;
char output_separator = tab < 0 ? ' ' : tab;
+ size_t field;
+ struct line const *line;
outlist = outlist_head.next;
if (outlist)
@@ -543,9 +574,6 @@ prjoin (struct line const *line1, struct line const *line2)
o = outlist;
while (1)
{
- size_t field;
- struct line const *line;
-
if (o->file == 0)
{
if (line1 == &uni_blank)
@@ -574,37 +602,24 @@ prjoin (struct line const *line1, struct line const *line2)
}
else
{
- size_t i;
-
if (line1 == &uni_blank)
{
- struct line const *t;
- t = line1;
- line1 = line2;
- line2 = t;
+ line = line2;
+ field = join_field_2;
}
- prfield (join_field_1, line1);
- for (i = 0; i < join_field_1 && i < line1->nfields; ++i)
- {
- putchar (output_separator);
- prfield (i, line1);
- }
- for (i = join_field_1 + 1; i < line1->nfields; ++i)
+ else
{
- putchar (output_separator);
- prfield (i, line1);
+ line = line1;
+ field = join_field_1;
}
- for (i = 0; i < join_field_2 && i < line2->nfields; ++i)
- {
- putchar (output_separator);
- prfield (i, line2);
- }
- for (i = join_field_2 + 1; i < line2->nfields; ++i)
- {
- putchar (output_separator);
- prfield (i, line2);
- }
+ /* Output the join field. */
+ prfield (field, line);
+
+ /* Output other fields. */
+ prfields (line1, join_field_1, autocount_1);
+ prfields (line2, join_field_2, autocount_2);
+
putchar ('\n');
}
}
@@ -627,6 +642,12 @@ join (FILE *fp1, FILE *fp2)
initseq (&seq2);
getseq (fp2, &seq2, 2);
+ if (autoformat)
+ {
+ autocount_1 = seq1.count ? seq1.lines[0]->nfields : 0;
+ autocount_2 = seq2.count ? seq2.lines[0]->nfields : 0;
+ }
+
if (join_header_lines && seq1.count && seq2.count)
{
prjoin (seq1.lines[0], seq2.lines[0]);
@@ -1037,8 +1058,13 @@ main (int argc, char **argv)
break;
case 'o':
- add_field_list (optarg);
- optc_status = MIGHT_BE_O_ARG;
+ if (STREQ (optarg, "auto"))
+ autoformat = true;
+ else
+ {
+ add_field_list (optarg);
+ optc_status = MIGHT_BE_O_ARG;
+ }
break;
case 't':
diff --git a/tests/misc/join b/tests/misc/join
index 3696a0381..3cf278b9f 100755
--- a/tests/misc/join
+++ b/tests/misc/join
@@ -127,6 +127,26 @@ my @tv = (
# From David Dyck
['9a', '', [" a 1\n b 2\n", " a Y\n b Z\n"], "a 1 Y\nb 2 Z\n", 0],
+# -o 'auto'
+['10a', '-a1 -a2 -e . -o auto',
+ ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
+ "a 1 2 3 4\nb 1 . 3 4\nc . . 3 4\nd 1 2 . .\n", 0],
+['10b', '-a1 -a2 -j3 -e . -o auto',
+ ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
+ "2 a 1 . .\n. b 1 . .\n2 d 1 . .\n4 . . a 3\n4 . . b 3\n4 . . c 3\n"],
+['10c', '-a1 -1 1 -2 4 -e. -o auto',
+ ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
+ "a 1 2 . . .\nb 1 . . . .\nd 1 2 . . .\n"],
+['10d', '-a2 -1 1 -2 4 -e. -o auto',
+ ["a 1 2\nb 1\nd 1 2\n", "a 3 4\nb 3 4\nc 3 4\n"],
+ ". . . a 3 4\n. . . b 3 4\n. . . c 3 4\n"],
+['10e', '-o auto',
+ ["a 1 2\nb 1 2 discard\n", "a 3 4\nb 3 4 discard\n"],
+ "a 1 2 3 4\nb 1 2 3 4\n"],
+['10f', '-t, -o auto',
+ ["a,1,,2\nb,1,2\n", "a,3,4\nb,3,4\n"],
+ "a,1,,2,3,4\nb,1,2,,3,4\n"],
+
# From Tim Smithers: fixed in 1.22l
['trailing-sp', '-t: -1 1 -2 1', ["a:x \n", "a:y \n"], "a:x :y \n", 0],