diff options
-rw-r--r-- | NEWS | 2 | ||||
-rw-r--r-- | src/join.c | 168 |
2 files changed, 90 insertions, 80 deletions
@@ -31,6 +31,8 @@ GNU coreutils NEWS -*- outline -*- HP-UX 11, Tru64, AIX, IRIX 6.5, and Cygwin, "ls -l" now displays the presence of an ACL on a file via a '+' sign after the mode, and "cp -p" copies ACLs. + join has significantly better performance due to better memory management + od now aligns fields across lines when printing multiple -t specifiers, and no longer prints fields that resulted entirely from padding the input out to the least common multiple width. diff --git a/src/join.c b/src/join.c index 4c506d1ac..041e2eb53 100644 --- a/src/join.c +++ b/src/join.c @@ -40,6 +40,12 @@ #define join system_join +#define SWAPLINES(a, b) do { \ + struct line *tmp = a; \ + a = b; \ + b = tmp; \ +} while (0); + /* An element of the list identifying which fields to print for each output line. */ struct outlist @@ -76,11 +82,16 @@ struct seq { size_t count; /* Elements used in `lines'. */ size_t alloc; /* Elements allocated in `lines'. */ - struct line *lines; + struct line **lines; }; /* The previous line read from each file. */ -static struct line *prevline[2]; +static struct line *prevline[2] = {NULL, NULL}; + +/* This provides an extra line buffer for each file. We need these if we + try to read two consecutive lines into the same buffer, since we don't + want to overwrite the previous buffer before we check order. */ +static struct line *spareline[2] = {NULL, NULL}; /* True if the LC_COLLATE locale is hard. */ static bool hard_LC_COLLATE; @@ -257,33 +268,6 @@ xfields (struct line *line) extract_field (line, ptr, lim - ptr); } -static struct line * -dup_line (const struct line *old) -{ - struct line *newline = xmalloc (sizeof *newline); - size_t i; - - /* Duplicate the buffer. */ - initbuffer (&newline->buf); - newline->buf.buffer = xmalloc (old->buf.size); - newline->buf.size = old->buf.size; - memcpy (newline->buf.buffer, old->buf.buffer, old->buf.length); - newline->buf.length = old->buf.length; - - /* Duplicate the field positions. */ - newline->fields = xnmalloc (old->nfields_allocated, sizeof *newline->fields); - newline->nfields = old->nfields; - newline->nfields_allocated = old->nfields_allocated; - - for (i = 0; i < old->nfields; i++) - { - newline->fields[i].len = old->fields[i].len; - newline->fields[i].beg = newline->buf.buffer + (old->fields[i].beg - - old->buf.buffer); - } - return newline; -} - static void freeline (struct line *line) { @@ -390,49 +374,69 @@ check_order (const struct line *prev, } } +static inline void +reset_line (struct line *line) +{ + line->nfields = 0; +} + +static struct line * +init_linep (struct line **linep) +{ + struct line *line = xmalloc (sizeof *line); + memset (line, '\0', sizeof *line); + *linep = line; + return line; +} + /* Read a line from FP into LINE and split it into fields. Return true if successful. */ static bool -get_line (FILE *fp, struct line *line, int which) +get_line (FILE *fp, struct line **linep, int which) { - initbuffer (&line->buf); + struct line *line = *linep; + + if (line == prevline[which - 1]) + { + SWAPLINES (line, spareline[which - 1]); + *linep = line; + } + + if (line) + reset_line (line); + else + line = init_linep (linep); if (! readlinebuffer (&line->buf, fp)) { if (ferror (fp)) error (EXIT_FAILURE, errno, _("read error")); - free (line->buf.buffer); - line->buf.buffer = NULL; + freeline (line); return false; } - line->nfields_allocated = 0; - line->nfields = 0; - line->fields = NULL; xfields (line); if (prevline[which - 1]) - { - check_order (prevline[which - 1], line, which); - freeline (prevline[which - 1]); - free (prevline[which - 1]); - } - prevline[which - 1] = dup_line (line); + check_order (prevline[which - 1], line, which); + + prevline[which - 1] = line; return true; } static void -free_prevline (void) +free_spareline (void) { size_t i; - for (i = 0; i < ARRAY_CARDINALITY (prevline); i++) + for (i = 0; i < ARRAY_CARDINALITY (spareline); i++) { - if (prevline[i]) - freeline (prevline[i]); - free (prevline[i]); - prevline[i] = NULL; + if (spareline[i]) + { + freeline (spareline[i]); + free (spareline[i]); + } } } @@ -450,7 +454,12 @@ static bool getseq (FILE *fp, struct seq *seq, int whichfile) { if (seq->count == seq->alloc) - seq->lines = X2NREALLOC (seq->lines, &seq->alloc); + { + size_t i; + seq->lines = X2NREALLOC (seq->lines, &seq->alloc); + for (i = seq->count; i < seq->alloc; i++) + seq->lines[i] = NULL; + } if (get_line (fp, &seq->lines[seq->count], whichfile)) { @@ -466,10 +475,8 @@ static bool advance_seq (FILE *fp, struct seq *seq, bool first, int whichfile) { if (first) - { - freeline (&seq->lines[0]); - seq->count = 0; - } + seq->count = 0; + return getseq (fp, seq, whichfile); } @@ -477,9 +484,13 @@ static void delseq (struct seq *seq) { size_t i; - for (i = 0; i < seq->count; i++) - if (seq->lines[i].buf.buffer) - freeline (&seq->lines[i]); + for (i = 0; i < seq->alloc; i++) + if (seq->lines[i]) + { + if (seq->lines[i]->buf.buffer) + freeline (seq->lines[i]); + free (seq->lines[i]); + } free (seq->lines); } @@ -592,10 +603,12 @@ static void join (FILE *fp1, FILE *fp2) { struct seq seq1, seq2; - struct line line; + struct line **linep = xmalloc (sizeof *linep); int diff; bool eof1, eof2, checktail; + *linep = NULL; + /* Read the first line of each file. */ initseq (&seq1); getseq (fp1, &seq1, 1); @@ -605,12 +618,12 @@ join (FILE *fp1, FILE *fp2) while (seq1.count && seq2.count) { size_t i; - diff = keycmp (&seq1.lines[0], &seq2.lines[0], + diff = keycmp (seq1.lines[0], seq2.lines[0], join_field_1, join_field_2); if (diff < 0) { if (print_unpairables_1) - prjoin (&seq1.lines[0], &uni_blank); + prjoin (seq1.lines[0], &uni_blank); advance_seq (fp1, &seq1, true, 1); seen_unpairable = true; continue; @@ -618,7 +631,7 @@ join (FILE *fp1, FILE *fp2) if (diff > 0) { if (print_unpairables_2) - prjoin (&uni_blank, &seq2.lines[0]); + prjoin (&uni_blank, seq2.lines[0]); advance_seq (fp2, &seq2, true, 2); seen_unpairable = true; continue; @@ -634,7 +647,7 @@ join (FILE *fp1, FILE *fp2) ++seq1.count; break; } - while (!keycmp (&seq1.lines[seq1.count - 1], &seq2.lines[0], + while (!keycmp (seq1.lines[seq1.count - 1], seq2.lines[0], join_field_1, join_field_2)); /* Keep reading lines from file2 as long as they continue to @@ -647,7 +660,7 @@ join (FILE *fp1, FILE *fp2) ++seq2.count; break; } - while (!keycmp (&seq1.lines[0], &seq2.lines[seq2.count - 1], + while (!keycmp (seq1.lines[0], seq2.lines[seq2.count - 1], join_field_1, join_field_2)); if (print_pairables) @@ -656,25 +669,21 @@ join (FILE *fp1, FILE *fp2) { size_t j; for (j = 0; j < seq2.count - 1; ++j) - prjoin (&seq1.lines[i], &seq2.lines[j]); + prjoin (seq1.lines[i], seq2.lines[j]); } } - for (i = 0; i < seq1.count - 1; ++i) - freeline (&seq1.lines[i]); if (!eof1) { - seq1.lines[0] = seq1.lines[seq1.count - 1]; + SWAPLINES (seq1.lines[0], seq1.lines[seq1.count - 1]); seq1.count = 1; } else seq1.count = 0; - for (i = 0; i < seq2.count - 1; ++i) - freeline (&seq2.lines[i]); if (!eof2) { - seq2.lines[0] = seq2.lines[seq2.count - 1]; + SWAPLINES (seq2.lines[0], seq2.lines[seq2.count - 1]); seq2.count = 1; } else @@ -694,14 +703,12 @@ join (FILE *fp1, FILE *fp2) if ((print_unpairables_1 || checktail) && seq1.count) { if (print_unpairables_1) - prjoin (&seq1.lines[0], &uni_blank); - freeline (&seq1.lines[0]); + prjoin (seq1.lines[0], &uni_blank); seen_unpairable = true; - while (get_line (fp1, &line, 1)) + while (get_line (fp1, linep, 1)) { if (print_unpairables_1) - prjoin (&line, &uni_blank); - freeline (&line); + prjoin (*linep, &uni_blank); if (issued_disorder_warning[0] && !print_unpairables_1) break; } @@ -710,19 +717,20 @@ join (FILE *fp1, FILE *fp2) if ((print_unpairables_2 || checktail) && seq2.count) { if (print_unpairables_2) - prjoin (&uni_blank, &seq2.lines[0]); - freeline (&seq2.lines[0]); + prjoin (&uni_blank, seq2.lines[0]); seen_unpairable = true; - while (get_line (fp2, &line, 2)) + while (get_line (fp2, linep, 2)) { if (print_unpairables_2) - prjoin (&uni_blank, &line); - freeline (&line); + prjoin (&uni_blank, *linep); if (issued_disorder_warning[1] && !print_unpairables_2) break; } } + free (*linep); + + free (linep); delseq (&seq1); delseq (&seq2); } @@ -938,7 +946,7 @@ main (int argc, char **argv) hard_LC_COLLATE = hard_locale (LC_COLLATE); atexit (close_stdout); - atexit (free_prevline); + atexit (free_spareline); print_pairables = true; seen_unpairable = false; |