diff options
author | Jim Meyering <jim@meyering.net> | 2002-02-11 14:28:09 +0000 |
---|---|---|
committer | Jim Meyering <jim@meyering.net> | 2002-02-11 14:28:09 +0000 |
commit | 395fbc0389f599ea16c4d9474a2166f57904013d (patch) | |
tree | 744f7c94a9445e9bb1aee23417c1156a237729ca | |
parent | 19d1db4624f484cb7747517018dc30455ca3365b (diff) | |
download | coreutils-395fbc0389f599ea16c4d9474a2166f57904013d.tar.xz |
2002-02-02 Paul Eggert <eggert@twinsun.com>
Bruno Haible <bruno@clisp.org>
* unicodeio.c (unicode_to_mb): New function, extracted from
print_unicode_char. Call failure callback instead of error.
(fwrite_success_callback): New function.
(exit_failure_callback): New function.
(fallback_failure_callback): New function.
(print_unicode_char): Call unicode_to_mb.
-rw-r--r-- | lib/unicodeio.c | 123 |
1 files changed, 93 insertions, 30 deletions
diff --git a/lib/unicodeio.c b/lib/unicodeio.c index 9f9036643..049b9afd0 100644 --- a/lib/unicodeio.c +++ b/lib/unicodeio.c @@ -1,6 +1,6 @@ /* Unicode character output to streams with locale dependent encoding. - Copyright (C) 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 2000-2002 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published @@ -19,6 +19,9 @@ /* Written by Bruno Haible <haible@clisp.cons.org>. */ +/* Note: This file requires the locale_charset() function. See in + libiconv-1.7/libcharset/INTEGRATE for how to obtain it. */ + #ifdef HAVE_CONFIG_H # include <config.h> #endif @@ -47,11 +50,13 @@ extern int errno; #if ENABLE_NLS # include <libintl.h> -# define _(Text) gettext (Text) #else -# define _(Text) Text +# define gettext(Text) Text #endif +#define _(Text) gettext (Text) +#define N_(Text) Text +/* Specification. */ #include "unicodeio.h" /* When we pass a Unicode character to iconv(), we must pass it in a @@ -105,10 +110,19 @@ utf8_wctomb (unsigned char *r, unsigned int wc) /* Luckily, the encoding's name is platform independent. */ #define UTF8_NAME "UTF-8" -/* Outputs the Unicode character CODE to the output stream STREAM. +/* Converts the Unicode character CODE to its multibyte representation + in the current locale and calls the SUCCESS callback on the resulting + byte sequence. If an error occurs, invokes the FAILURE callback instead, + passing it CODE and an English error string. + Returns whatever the callback returned. Assumes that the locale doesn't change between two calls. */ -void -print_unicode_char (FILE *stream, unsigned int code) +long +unicode_to_mb (unsigned int code, + long (*success) PARAMS ((const char *buf, size_t buflen, + void *callback_arg)), + long (*failure) PARAMS ((unsigned int code, const char *msg, + void *callback_arg)), + void *callback_arg) { static int initialized; static int is_utf8; @@ -130,31 +144,32 @@ print_unicode_char (FILE *stream, unsigned int code) { utf8_to_local = iconv_open (charset, UTF8_NAME); if (utf8_to_local == (iconv_t)(-1)) - { - /* For an unknown encoding, assume ASCII. */ - utf8_to_local = iconv_open ("ASCII", UTF8_NAME); - if (utf8_to_local == (iconv_t)(-1)) - error (1, 0, - _("cannot output U+%04X: iconv function not usable"), - code); - } + /* For an unknown encoding, assume ASCII. */ + utf8_to_local = iconv_open ("ASCII", UTF8_NAME); } #endif initialized = 1; } + /* Test whether the utf8_to_local converter is available at all. */ + if (!is_utf8) + { +#if HAVE_ICONV + if (utf8_to_local == (iconv_t)(-1)) + return failure (code, N_("iconv function not usable"), callback_arg); +#else + return failure (code, N_("iconv function not available"), callback_arg); +#endif + } + /* Convert the character to UTF-8. */ count = utf8_wctomb ((unsigned char *) inbuf, code); if (count < 0) - error (1, 0, _("U+%04X: character out of range"), code); + return failure (code, N_("character out of range"), callback_arg); - if (is_utf8) - { - fwrite (inbuf, 1, count, stream); - } - else - { #if HAVE_ICONV + if (!is_utf8) + { char outbuf[25]; const char *inptr; size_t inbytesleft; @@ -177,8 +192,7 @@ print_unicode_char (FILE *stream, unsigned int code) || (res > 0 && code != 0 && outptr - outbuf == 1 && *outbuf == '\0') # endif ) - error (1, res == (size_t)(-1) ? errno : 0, - _("cannot convert U+%04X to local character set"), code); + return failure (code, NULL, callback_arg); /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ # if defined _LIBICONV_VERSION \ @@ -187,14 +201,63 @@ print_unicode_char (FILE *stream, unsigned int code) /* Get back to the initial shift state. */ res = iconv (utf8_to_local, NULL, NULL, &outptr, &outbytesleft); if (res == (size_t)(-1)) - error (1, errno, _("cannot convert U+%04X to local character set"), - code); + return failure (code, NULL, callback_arg); # endif - fwrite (outbuf, 1, outptr - outbuf, stream); -#else - error (1, 0, _("cannot output U+%04X: iconv function not available"), - code); -#endif + return success (outbuf, outptr - outbuf, callback_arg); } +#endif + + /* At this point, is_utf8 is true, so no conversion is needed. */ + return success (inbuf, count, callback_arg); +} + +/* Simple success callback that outputs the converted string. + The STREAM is passed as callback_arg. */ +long +fwrite_success_callback (const char *buf, size_t buflen, void *callback_arg) +{ + FILE *stream = (FILE *) callback_arg; + + fwrite (buf, 1, buflen, stream); + return 0; +} + +/* Simple failure callback that displays an error and exits. */ +static long +exit_failure_callback (unsigned int code, const char *msg, void *callback_arg) +{ + if (msg == NULL) + error (1, 0, _("cannot convert U+%04X to local character set"), code); + else + error (1, 0, _("cannot convert U+%04X to local character set: %s"), code, + gettext (msg)); + return -1; +} + +/* Simple failure callback that displays a fallback representation in plain + ASCII, using the same notation as ISO C99 strings. */ +static long +fallback_failure_callback (unsigned int code, const char *msg, void *callback_arg) +{ + FILE *stream = (FILE *) callback_arg; + + if (code < 0x10000) + fprintf (stream, "\\u%04X", code); + else + fprintf (stream, "\\U%08X", code); + return -1; +} + +/* Outputs the Unicode character CODE to the output stream STREAM. + Upon failure, exit if exit_on_error is true, otherwise output a fallback + notation. */ +void +print_unicode_char (FILE *stream, unsigned int code, int exit_on_error) +{ + unicode_to_mb (code, fwrite_success_callback, + exit_on_error + ? exit_failure_callback + : fallback_failure_callback, + stream); } |