diff options
author | Jim Meyering <jim@meyering.net> | 1995-01-27 05:37:03 +0000 |
---|---|---|
committer | Jim Meyering <jim@meyering.net> | 1995-01-27 05:37:03 +0000 |
commit | 1a41e74d32f45f7208a87aa641e12bae69278dac (patch) | |
tree | 7d69cd4b27f25132f22bda311ac3b6e83390e76f | |
parent | 66444bdafb3fad56f9b8c8cb888468c6e7a89683 (diff) | |
download | coreutils-1a41e74d32f45f7208a87aa641e12bae69278dac.tar.xz |
.
-rw-r--r-- | lib/readtokens.c | 213 | ||||
-rw-r--r-- | lib/readtokens.h | 29 |
2 files changed, 242 insertions, 0 deletions
diff --git a/lib/readtokens.c b/lib/readtokens.c new file mode 100644 index 000000000..c1b80a24d --- /dev/null +++ b/lib/readtokens.c @@ -0,0 +1,213 @@ +/* readtokens.c -- Functions for reading tokens from an input stream. + Copyright (C) 1990-1991 Jim Meyering. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Written by Jim Meyering. */ + +/* This almost supercedes xreadline stuff -- using delim="\n" + gives the same functionality, except that these functions + would never return empty lines. + + To Do: + - To allow '\0' as a delimiter, I will have to change + interfaces to permit specification of delimiter-string + length. + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> + +#ifdef STDC_HEADERS +#include <stdlib.h> +#endif + +#if defined (STDC_HEADERS) || defined(HAVE_STRING_H) +#include <string.h> +/* An ANSI string.h and pre-ANSI memory.h might conflict. */ +#if !defined (STDC_HEADERS) && defined (HAVE_MEMORY_H) +#include <memory.h> +#endif /* not STDC_HEADERS and HAVE_MEMORY_H */ +#else /* not STDC_HEADERS and not HAVE_STRING_H */ +#include <strings.h> +/* memory.h and strings.h conflict on some systems. */ +#endif /* not STDC_HEADERS and not HAVE_STRING_H */ + +#include "readtokens.h" +#include "xalloc.h" + +#define STREQ(a,b) ((a) == (b) || ((a) && (b) && *(a) == *(b) \ + && strcmp(a, b) == 0)) + +/* Initialize a tokenbuffer. */ + +void +init_tokenbuffer (token_buffer *tokenbuffer) +{ + tokenbuffer->size = INITIAL_TOKEN_LENGTH; + tokenbuffer->buffer = ((char *) xmalloc (INITIAL_TOKEN_LENGTH)); +} + +/* Read a token from `stream' into `tokenbuffer'. + Upon return, the token is in tokenbuffer->buffer and + has a trailing '\0' instead of the original delimiter. + The function value is the length of the token not including + the final '\0'. When EOF is reached (i.e. on the call + after the last token is read), -1 is returned and tokenbuffer + isn't modified. + + This function will work properly on lines containing NUL bytes + and on files that aren't newline-terminated. */ + +long +readtoken (FILE *stream, const char *delim, int n_delim, + token_buffer *tokenbuffer) +{ + char *p; + int c, i, n; + static const char *saved_delim = NULL; + static char isdelim[256]; + int same_delimiters; + + if (delim == NULL && saved_delim == NULL) + abort (); + + same_delimiters = 0; + if (delim != saved_delim && saved_delim != NULL) + { + same_delimiters = 1; + for (i = 0; i < n_delim; i++) + { + if (delim[i] != saved_delim[i]) + { + same_delimiters = 0; + break; + } + } + } + + if (!same_delimiters) + { + const char *t; + saved_delim = delim; + for (i = 0; i < sizeof (isdelim); i++) + isdelim[i] = 0; + for (t = delim; *t; t++) + isdelim[(unsigned int) *t] = 1; + } + + p = tokenbuffer->buffer; + n = tokenbuffer->size; + i = 0; + + /* FIXME: don't fool with this caching BS. Use strchr instead. */ + /* skip over any leading delimiters */ + for (c = getc (stream); c >= 0 && isdelim[c]; c = getc (stream)) + { + /* empty */ + } + + for (;;) + { + if (i >= n) + { + n = 3 * (n / 2 + 1); + p = xrealloc (p, (unsigned int) n); + } + if (c < 0) + { + if (i == 0) + return (-1); + p[i] = 0; + break; + } + if (isdelim[c]) + { + p[i] = 0; + break; + } + p[i++] = c; + c = getc (stream); + } + + tokenbuffer->buffer = p; + tokenbuffer->size = n; + return (i); +} + +/* Return a NULL-terminated array of pointers to tokens + read from `stream.' The number of tokens is returned + as the value of the function. + All storage is obtained through calls to malloc(); + + %%% Question: is it worth it to do a single + %%% realloc() of `tokens' just before returning? */ + +int +readtokens (FILE *stream, int projected_n_tokens, + const char *delim, int n_delim, + char ***tokens_out, long **token_lengths) +{ + token_buffer tb, *token = &tb; + int token_length; + char **tokens; + long *lengths; + int sz; + int n_tokens; + + n_tokens = 0; + if (projected_n_tokens > 0) + projected_n_tokens++; /* add one for trailing NULL pointer */ + else + projected_n_tokens = 64; + sz = projected_n_tokens; + tokens = (char **) xmalloc (sz * sizeof (char *)); + lengths = (long *) xmalloc (sz * sizeof (long)); + + init_tokenbuffer (token); + for (;;) + { + char *tmp; + token_length = readtoken (stream, delim, n_delim, token); + if (n_tokens >= sz) + { + sz *= 2; + tokens = (char **) xrealloc (tokens, sz * sizeof (char *)); + lengths = (long *) xrealloc (lengths, sz * sizeof (long)); + } + + if (token_length < 0) + { + /* don't increment n_tokens for NULL entry */ + tokens[n_tokens] = NULL; + lengths[n_tokens] = -1; + break; + } + tmp = (char *) xmalloc ((token_length + 1) * sizeof (char)); + lengths[n_tokens] = token_length; + tokens[n_tokens] = strncpy (tmp, token->buffer, + (unsigned) (token_length + 1)); + n_tokens++; + } + + free (token->buffer); + *tokens_out = tokens; + if (token_lengths != NULL) + *token_lengths = lengths; + return n_tokens; +} diff --git a/lib/readtokens.h b/lib/readtokens.h new file mode 100644 index 000000000..101faa42e --- /dev/null +++ b/lib/readtokens.h @@ -0,0 +1,29 @@ +#ifndef H_READTOKENS_H +#define H_READTOKENS_H + +#ifndef INITIAL_TOKEN_LENGTH +#define INITIAL_TOKEN_LENGTH 20 +#endif + +#ifndef TOKENBUFFER_DEFINED +#define TOKENBUFFER_DEFINED +struct tokenbuffer +{ + long size; + char *buffer; +}; +typedef struct tokenbuffer token_buffer; + +#endif /* not TOKENBUFFER_DEFINED */ + +void init_tokenbuffer (token_buffer *tokenbuffer); + +long + readtoken (FILE *stream, const char *delim, int n_delim, + token_buffer *tokenbuffer); +int + readtokens (FILE *stream, int projected_n_tokens, + const char *delim, int n_delim, + char ***tokens_out, long **token_lengths); + +#endif /* not H_READTOKENS_H */ |