summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Youngman <jay@gnu.org>2007-05-26 07:08:18 +0200
committerJim Meyering <jim@meyering.net>2007-05-26 07:13:50 +0200
commite82c7ddd4737d3e402cb6c73d91b50afe8c5b7f7 (patch)
tree8499ecbe6d89853aeeee0465f99c4c3ce3c81133
parent8ab7f351a139ab25a14843c69699069242bfc510 (diff)
downloadcoreutils-e82c7ddd4737d3e402cb6c73d91b50afe8c5b7f7.tar.xz
wc: ignore multibyte-character decoding errors
* src/wc.c (wc): Don't issue an error message when mbrtowc indicates that we have seen an invalid byte sequence. This makes "wc /bin/sh" bearable (though the word and line counts are likely not to be useful). * NEWS: Mention the change.
-rw-r--r--ChangeLog9
-rw-r--r--NEWS5
-rw-r--r--src/wc.c19
3 files changed, 19 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index 34304f124..ab446387c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2007-05-25 James Youngman <jay@gnu.org>
+
+ wc: ignore multibyte-character decoding errors
+ * src/wc.c (wc): Don't issue an error message when mbrtowc
+ indicates that we have seen an invalid byte sequence. This
+ makes "wc /bin/sh" bearable (though the word and line counts
+ are likely not to be useful).
+ * NEWS: Mention the change.
+
2007-05-22 Jim Meyering <jim@meyering.net>
Check for an up-to-date copyright year in coreutils.texi.
diff --git a/NEWS b/NEWS
index ea08e0a92..715b0d698 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,11 @@ GNU coreutils NEWS -*- outline -*-
option of the same name, this makes uniq consume and produce
NUL-terminated lines rather than newline-terminated lines.
+ wc no longer warns about character decoding errors in multibyte locales.
+ This means for example that "wc /bin/sh" now produces normal output
+ (though the word count will have no real meaning) rather than many
+ error messages.
+
** Bug fixes
cut now diagnoses a range starting with zero (e.g., -f 0-2) as invalid;
diff --git a/src/wc.c b/src/wc.c
index 85f7d33a9..b4464d2c4 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -1,5 +1,5 @@
/* wc - print the number of lines, words, and bytes in files
- Copyright (C) 85, 91, 1995-2006 Free Software Foundation, Inc.
+ Copyright (C) 85, 91, 1995-2007 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -274,8 +274,6 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
bool in_word = false;
uintmax_t linepos = 0;
mbstate_t state = { 0, };
- uintmax_t last_error_line = 0;
- int last_error_errno = 0;
# if SUPPORT_OLD_MBRTOWC
/* Back-up the state before each multibyte character conversion and
move the last incomplete character of the buffer to the front
@@ -323,17 +321,10 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
}
if (n == (size_t) -1)
{
- /* Signal repeated errors only once per line. */
- if (!(lines + 1 == last_error_line
- && errno == last_error_errno))
- {
- char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
- last_error_line = lines + 1;
- last_error_errno = errno;
- error (0, errno, "%s:%s", file,
- umaxtostr (last_error_line, line_number_buf));
- ok = false;
- }
+ /* Remember that we read a byte, but don't complain
+ about the error. Because of the decoding error,
+ this is a considered to be byte but not a
+ character (that is, chars is not incremented). */
p++;
bytes_read--;
}