summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>1993-12-23 19:11:21 +0000
committerJim Meyering <jim@meyering.net>1993-12-23 19:11:21 +0000
commit2db44d14e45b2ae891722bac76ddb6692494fce2 (patch)
tree2dd6a4da191ff5b7a315c5410427c509380b77da
parent5fbc4e23846341ffb8f157c40175011d542ec33d (diff)
downloadcoreutils-2db44d14e45b2ae891722bac76ddb6692494fce2.tar.xz
merge with 1.9.1b
-rw-r--r--old/textutils/ChangeLog32
-rw-r--r--old/textutils/NEWS8
-rw-r--r--src/cut.c90
-rw-r--r--src/sort.c11
4 files changed, 102 insertions, 39 deletions
diff --git a/old/textutils/ChangeLog b/old/textutils/ChangeLog
index ceadcef77..ba143db5f 100644
--- a/old/textutils/ChangeLog
+++ b/old/textutils/ChangeLog
@@ -1,3 +1,35 @@
+Wed Dec 22 18:52:44 1993 Jim Meyering (meyering@comco.com)
+
+ * memcmp.c: Use the latest version from the GNU C library.
+
+ * cut.c [ADD_RANGE_PAIR]: New macro.
+ (set_fields): Collect the list of all selected ranges before
+ allocating and initializing the boolean lookup table.
+ (cut_bytes, cut_fields): Complete rewrite. Avoid copying
+ into buffer whenever possible. Properly handle input without
+ trailing newline.
+ (getstr): New function. Copied from getline.c, but with minor changes.
+
+ * sort.c (main): Properly handle -Tdir.
+ Before, `sort -T/var/tmp' gave `sort: unrecognized option `-v''.
+ Reported by Kristoffer Rose (kris@diku.dk).
+
+ * cut.c (main): Give separate errors for `-s without -f'
+ and `-d without -f'.
+ (main): Now -d '' means `use the NUL byte as the delimiter'.
+ Before, it got an error.
+
+Mon Dec 20 23:29:30 1993 Jim Meyering (meyering@comco.com)
+
+ * configure.in [test for 8-bit clean memcmp]: Add a test to detect
+ losing memcmp from SunOS4.1.x.
+
+Sat Dec 18 01:12:24 1993 Jim Meyering (meyering@comco.com)
+
+ * configure.in (AC_OUTPUT): Put `touch stamp-config' in second arg
+ so it goes in config.status. This eliminates unnecessary second run
+ of configure.
+
Thu Dec 02 23:53:03 1993 Jim Meyering (meyering@comco.com)
* configure.in (AC_HAVE_FUNCS): Add isascii.
diff --git a/old/textutils/NEWS b/old/textutils/NEWS
index 4cd0478d7..df71b4645 100644
--- a/old/textutils/NEWS
+++ b/old/textutils/NEWS
@@ -1,3 +1,11 @@
+User-visible changes in release 1.10
+* cut interprets -d '' to mean `use the NUL byte as the delimiter.' rather
+ than reporting that no delimiter was specified.
+* `echo a:b:c: | cut -d: -f3,4' prints `c:'. Before it printed just `c'.
+* cut has been rewritten, is markedly faster for large inputs, and passes a
+ fairly large test suite.
+* sort properly handles the argument to the -T option.
+
Major changes in release 1.9.1:
* cut no longer ignores the last line of input when that line lacks a
trailing newline character
diff --git a/src/cut.c b/src/cut.c
index f6f6bfae6..36080e62e 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -22,6 +22,8 @@
POSIX changes, bug fixes, long-named options, and cleanup
by David MacKenzie <djm@gnu.ai.mit.edu>.
+ Rewrite cut_fields and cut_bytes -- Jim Meyering (meyering@comco.com).
+
Options:
--bytes=byte-list
-b byte-list Print only the bytes in positions listed
@@ -73,8 +75,7 @@
#include <stdio.h>
-/* FIXME */
-/* #define NDEBUG */
+#define NDEBUG
#include <assert.h>
#include <getopt.h>
@@ -90,11 +91,9 @@
} \
while (0)
-struct range_pair
- {
- int lo;
- int hi;
- };
+/* Append LOW, HIGH to the list RP of range pairs, allocating additional
+ space if necessary. Update local variable N_RP. When allocating,
+ update global variable N_RP_ALLOCATED. */
#define ADD_RANGE_PAIR(rp, low, high) \
do \
@@ -111,14 +110,26 @@ struct range_pair
} \
while (0)
+struct range_pair
+ {
+ int lo;
+ int hi;
+ };
+
char *xmalloc ();
char *xrealloc ();
void error ();
-/* FIXME: Comment. */
+/* This buffer is used to support the semantics of the -s option
+ (or lack of same) when the specified field list includes (does
+ not include) the first field. In both of those cases, the entire
+ first field must be read into this buffer to determine whether it
+ is followed by a delimiter or a newline before any of it may be
+ output. Otherwise, cut_fields can do the job without using this
+ buffer. */
static char *field_1_buffer;
-/* FIXME: Comment. */
+/* The number of bytes allocated for FIELD_1_BUFFER. */
static int field_1_bufsize;
/* The largest field or byte index used as an endpoint of a closed
@@ -223,7 +234,13 @@ With no FILE, or when FILE is -, read standard input.\n\
exit (status);
}
-/* Begin ------------ from getline.c */
+/* The following function was copied from getline.c, but with these changes:
+ - Read up to and including a newline or TERMINATOR, whichever comes first.
+ The original does not treat newline specially.
+ - Remove unused argument, OFFSET.
+ - Use xmalloc and xrealloc instead of malloc and realloc.
+ - Declare this function static. */
+
/* Always add at least this many bytes when extending the buffer. */
#define MIN_CHUNK 64
@@ -233,7 +250,7 @@ With no FILE, or when FILE is -, read standard input.\n\
xrealloc'd as necessary. Return the number of characters read (not
including the null terminator), or -1 on error or EOF. */
-int
+static int
getstr (lineptr, n, stream, terminator)
char **lineptr;
int *n;
@@ -318,12 +335,19 @@ print_kth (k)
to its starting index. FIELDSTR should be composed of one or more
numbers or ranges of numbers, separated by blanks or commas.
Incomplete ranges may be given: `-m' means `1-m'; `n-' means `n'
- through end of line or last field. Return non-zero if FIELDSTR
- contains at least one field specification, zero otherwise. */
+ through end of line. Return non-zero if FIELDSTR contains at least
+ one field specification, zero otherwise. */
+
+/* FIXME-someday: What if the user wants to cut out the 1,000,000-th field
+ of some huge input file? This function shouldn't have to alloate a table
+ of a million ints just so we can test every field < 10^6 with an array
+ dereference. Instead, consider using a dynamic hash table. It would be
+ simpler and nearly as good a solution to use a 32K x 4-byte table with
+ one bit per field index instead of a whole `int' per index. */
static int
set_fields (fieldstr)
- char *fieldstr;
+ const char *fieldstr;
{
int initial = 1; /* Value of first number in a range. */
int dash_found = 0; /* Nonzero if a '-' is found in this field. */
@@ -337,8 +361,7 @@ set_fields (fieldstr)
int i;
n_rp = 0;
- /* FIXME: use 1 only for testing. */
- n_rp_allocated = 1;
+ n_rp_allocated = 16;
rp = (struct range_pair *) xmalloc (n_rp_allocated * sizeof (*rp));
/* Collect and store in RP the range end points.
@@ -474,18 +497,17 @@ set_fields (fieldstr)
return field_found;
}
-/* Print the file open for reading on stream STREAM
- with the bytes marked `FIELD_OMIT' in `fields' removed from each line. */
+/* Read from stream STREAM, printing to standard output any selected bytes. */
static void
cut_bytes (stream)
FILE *stream;
{
- int n_bytes; /* Number of chars in the line so far. */
+ int byte_idx; /* Number of chars in the line so far. */
int printed_from_curr_line;
printed_from_curr_line = 0;
- n_bytes = 0;
+ byte_idx = 0;
while (1)
{
register int c; /* Each character from the file. */
@@ -499,23 +521,21 @@ cut_bytes (stream)
if (c == EOF)
break;
printed_from_curr_line = 0;
- n_bytes = 0;
+ byte_idx = 0;
}
else
{
- ++n_bytes;
- if (print_kth (n_bytes))
+ ++byte_idx;
+ if (print_kth (byte_idx))
{
printed_from_curr_line = 1;
putchar (c);
}
}
- /* WORKING */
}
}
-/* Read from stream STREAM, printing to standard output any selected fields.
- FIXME: comment. */
+/* Read from stream STREAM, printing to standard output any selected fields. */
static void
cut_fields (FILE *stream)
@@ -523,7 +543,7 @@ cut_fields (FILE *stream)
int c;
int field_idx;
int found_any_selected_field;
- int first_field_special;
+ int buffer_first_field;
found_any_selected_field = 0;
field_idx = 1;
@@ -534,11 +554,11 @@ cut_fields (FILE *stream)
and the first field has been selected, or if non-delimited lines
must be suppressed and the first field has *not* been selected.
That is because a non-delimited line has exactly one field. */
- first_field_special = (suppress_non_delimited ^ !print_kth (1));
+ buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
while (1)
{
- if (field_idx == 1 && first_field_special)
+ if (field_idx == 1 && buffer_first_field)
{
int len;
@@ -715,9 +735,8 @@ main (argc, argv)
case 'd':
/* New delimiter. */
- if (optarg[0] == '\0')
- FATAL_ERROR ("missing delimiter argument");
- if (optarg[1] != '\0')
+ /* Interpret -d '' to mean `use the NUL byte as the delimiter.' */
+ if (optarg[0] != '\0' && optarg[1] != '\0')
FATAL_ERROR ("the delimiter must be a single character");
delim = optarg[0];
break;
@@ -746,10 +765,13 @@ main (argc, argv)
if (operating_mode == undefined_mode)
FATAL_ERROR ("you must specify a list of bytes, characters, or fields");
- /* FIXME: what is this? */
- if ((suppress_non_delimited || delim != '\0') && operating_mode != field_mode)
+ if (delim != '\0' && operating_mode != field_mode)
FATAL_ERROR ("a delimiter may be specified only when operating on fields");
+ if (suppress_non_delimited && operating_mode != field_mode)
+ FATAL_ERROR ("suppressing non-delimited lines makes sense\n\
+\tonly when operating on fields");
+
if (delim == '\0')
delim = '\t';
diff --git a/src/sort.c b/src/sort.c
index df6e74e54..166d8db95 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -1667,13 +1667,14 @@ main (argc, argv)
case 'T':
if (s[1])
temp_file_prefix = ++s;
- else if (i < argc - 1)
+ else
{
- temp_file_prefix = argv[++i];
- goto outer;
+ if (i < argc - 1)
+ temp_file_prefix = argv[++i];
+ else
+ error (2, 0, "option `-T' requires an argument");
}
- else
- error (2, 0, "option `-T' requires an argument");
+ goto outer;
break;
case 'u':
unique = 1;