summaryrefslogtreecommitdiff
path: root/lib/readtokens.c
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>1995-01-27 05:37:03 +0000
committerJim Meyering <jim@meyering.net>1995-01-27 05:37:03 +0000
commit1a41e74d32f45f7208a87aa641e12bae69278dac (patch)
tree7d69cd4b27f25132f22bda311ac3b6e83390e76f /lib/readtokens.c
parent66444bdafb3fad56f9b8c8cb888468c6e7a89683 (diff)
downloadcoreutils-1a41e74d32f45f7208a87aa641e12bae69278dac.tar.xz
.
Diffstat (limited to 'lib/readtokens.c')
-rw-r--r--lib/readtokens.c213
1 files changed, 213 insertions, 0 deletions
diff --git a/lib/readtokens.c b/lib/readtokens.c
new file mode 100644
index 000000000..c1b80a24d
--- /dev/null
+++ b/lib/readtokens.c
@@ -0,0 +1,213 @@
+/* readtokens.c -- Functions for reading tokens from an input stream.
+ Copyright (C) 1990-1991 Jim Meyering.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Written by Jim Meyering. */
+
+/* This almost supercedes xreadline stuff -- using delim="\n"
+ gives the same functionality, except that these functions
+ would never return empty lines.
+
+ To Do:
+ - To allow '\0' as a delimiter, I will have to change
+ interfaces to permit specification of delimiter-string
+ length.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#endif
+
+#if defined (STDC_HEADERS) || defined(HAVE_STRING_H)
+#include <string.h>
+/* An ANSI string.h and pre-ANSI memory.h might conflict. */
+#if !defined (STDC_HEADERS) && defined (HAVE_MEMORY_H)
+#include <memory.h>
+#endif /* not STDC_HEADERS and HAVE_MEMORY_H */
+#else /* not STDC_HEADERS and not HAVE_STRING_H */
+#include <strings.h>
+/* memory.h and strings.h conflict on some systems. */
+#endif /* not STDC_HEADERS and not HAVE_STRING_H */
+
+#include "readtokens.h"
+#include "xalloc.h"
+
+#define STREQ(a,b) ((a) == (b) || ((a) && (b) && *(a) == *(b) \
+ && strcmp(a, b) == 0))
+
+/* Initialize a tokenbuffer. */
+
+void
+init_tokenbuffer (token_buffer *tokenbuffer)
+{
+ tokenbuffer->size = INITIAL_TOKEN_LENGTH;
+ tokenbuffer->buffer = ((char *) xmalloc (INITIAL_TOKEN_LENGTH));
+}
+
+/* Read a token from `stream' into `tokenbuffer'.
+ Upon return, the token is in tokenbuffer->buffer and
+ has a trailing '\0' instead of the original delimiter.
+ The function value is the length of the token not including
+ the final '\0'. When EOF is reached (i.e. on the call
+ after the last token is read), -1 is returned and tokenbuffer
+ isn't modified.
+
+ This function will work properly on lines containing NUL bytes
+ and on files that aren't newline-terminated. */
+
+long
+readtoken (FILE *stream, const char *delim, int n_delim,
+ token_buffer *tokenbuffer)
+{
+ char *p;
+ int c, i, n;
+ static const char *saved_delim = NULL;
+ static char isdelim[256];
+ int same_delimiters;
+
+ if (delim == NULL && saved_delim == NULL)
+ abort ();
+
+ same_delimiters = 0;
+ if (delim != saved_delim && saved_delim != NULL)
+ {
+ same_delimiters = 1;
+ for (i = 0; i < n_delim; i++)
+ {
+ if (delim[i] != saved_delim[i])
+ {
+ same_delimiters = 0;
+ break;
+ }
+ }
+ }
+
+ if (!same_delimiters)
+ {
+ const char *t;
+ saved_delim = delim;
+ for (i = 0; i < sizeof (isdelim); i++)
+ isdelim[i] = 0;
+ for (t = delim; *t; t++)
+ isdelim[(unsigned int) *t] = 1;
+ }
+
+ p = tokenbuffer->buffer;
+ n = tokenbuffer->size;
+ i = 0;
+
+ /* FIXME: don't fool with this caching BS. Use strchr instead. */
+ /* skip over any leading delimiters */
+ for (c = getc (stream); c >= 0 && isdelim[c]; c = getc (stream))
+ {
+ /* empty */
+ }
+
+ for (;;)
+ {
+ if (i >= n)
+ {
+ n = 3 * (n / 2 + 1);
+ p = xrealloc (p, (unsigned int) n);
+ }
+ if (c < 0)
+ {
+ if (i == 0)
+ return (-1);
+ p[i] = 0;
+ break;
+ }
+ if (isdelim[c])
+ {
+ p[i] = 0;
+ break;
+ }
+ p[i++] = c;
+ c = getc (stream);
+ }
+
+ tokenbuffer->buffer = p;
+ tokenbuffer->size = n;
+ return (i);
+}
+
+/* Return a NULL-terminated array of pointers to tokens
+ read from `stream.' The number of tokens is returned
+ as the value of the function.
+ All storage is obtained through calls to malloc();
+
+ %%% Question: is it worth it to do a single
+ %%% realloc() of `tokens' just before returning? */
+
+int
+readtokens (FILE *stream, int projected_n_tokens,
+ const char *delim, int n_delim,
+ char ***tokens_out, long **token_lengths)
+{
+ token_buffer tb, *token = &tb;
+ int token_length;
+ char **tokens;
+ long *lengths;
+ int sz;
+ int n_tokens;
+
+ n_tokens = 0;
+ if (projected_n_tokens > 0)
+ projected_n_tokens++; /* add one for trailing NULL pointer */
+ else
+ projected_n_tokens = 64;
+ sz = projected_n_tokens;
+ tokens = (char **) xmalloc (sz * sizeof (char *));
+ lengths = (long *) xmalloc (sz * sizeof (long));
+
+ init_tokenbuffer (token);
+ for (;;)
+ {
+ char *tmp;
+ token_length = readtoken (stream, delim, n_delim, token);
+ if (n_tokens >= sz)
+ {
+ sz *= 2;
+ tokens = (char **) xrealloc (tokens, sz * sizeof (char *));
+ lengths = (long *) xrealloc (lengths, sz * sizeof (long));
+ }
+
+ if (token_length < 0)
+ {
+ /* don't increment n_tokens for NULL entry */
+ tokens[n_tokens] = NULL;
+ lengths[n_tokens] = -1;
+ break;
+ }
+ tmp = (char *) xmalloc ((token_length + 1) * sizeof (char));
+ lengths[n_tokens] = token_length;
+ tokens[n_tokens] = strncpy (tmp, token->buffer,
+ (unsigned) (token_length + 1));
+ n_tokens++;
+ }
+
+ free (token->buffer);
+ *tokens_out = tokens;
+ if (token_lengths != NULL)
+ *token_lengths = lengths;
+ return n_tokens;
+}