wc: ignore multibyte-character decoding errors

* src/wc.c (wc): Don't issue an error message when mbrtowc indicates that we have seen an invalid byte sequence. This makes "wc /bin/sh" bearable (though the word and line counts are likely not to be useful). * NEWS: Mention the change.
author: James Youngman <jay@gnu.org> 2007-05-26 07:08:18 +0200
committer: Jim Meyering <jim@meyering.net> 2007-05-26 07:13:50 +0200
commit: e82c7ddd4737d3e402cb6c73d91b50afe8c5b7f7 (patch)
tree: 8499ecbe6d89853aeeee0465f99c4c3ce3c81133
parent: 8ab7f351a139ab25a14843c69699069242bfc510 (diff)
download: coreutils-e82c7ddd4737d3e402cb6c73d91b50afe8c5b7f7.tar.xz
3 files changed, 19 insertions, 14 deletions
diff --git a/ChangeLog b/ChangeLog
index 34304f124..ab446387c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2007-05-25  James Youngman  <jay@gnu.org>
+
+	wc: ignore multibyte-character decoding errors
+	* src/wc.c (wc): Don't issue an error message when mbrtowc
+	indicates that we have seen an invalid byte sequence.  This
+	makes "wc /bin/sh" bearable (though the word and line counts
+	are likely not to be useful).
+	* NEWS: Mention the change.
+
 2007-05-22  Jim Meyering  <jim@meyering.net>
 
 	Check for an up-to-date copyright year in coreutils.texi.
diff --git a/NEWS b/NEWS
index ea08e0a92..715b0d698 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,11 @@ GNU coreutils NEWS                                    -*- outline -*-
   option of the same name, this makes uniq consume and produce
   NUL-terminated lines rather than newline-terminated lines.
 
+  wc no longer warns about character decoding errors in multibyte locales.
+  This means for example that "wc /bin/sh" now produces normal output
+  (though the word count will have no real meaning) rather than many
+  error messages.
+
 ** Bug fixes
 
   cut now diagnoses a range starting with zero (e.g., -f 0-2) as invalid;
diff --git a/src/wc.c b/src/wc.c
index 85f7d33a9..b4464d2c4 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -1,5 +1,5 @@
 /* wc - print the number of lines, words, and bytes in files
-   Copyright (C) 85, 91, 1995-2006 Free Software Foundation, Inc.
+   Copyright (C) 85, 91, 1995-2007 Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -274,8 +274,6 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
       bool in_word = false;
       uintmax_t linepos = 0;
       mbstate_t state = { 0, };
-      uintmax_t last_error_line = 0;
-      int last_error_errno = 0;
 # if SUPPORT_OLD_MBRTOWC
       /* Back-up the state before each multibyte character conversion and
 	 move the last incomplete character of the buffer to the front
@@ -323,17 +321,10 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)
 		}
 	      if (n == (size_t) -1)
 		{
-		  /* Signal repeated errors only once per line.  */
-		  if (!(lines + 1 == last_error_line
-			&& errno == last_error_errno))
-		    {
-		      char line_number_buf[INT_BUFSIZE_BOUND (uintmax_t)];
-		      last_error_line = lines + 1;
-		      last_error_errno = errno;
-		      error (0, errno, "%s:%s", file,
-			     umaxtostr (last_error_line, line_number_buf));
-		      ok = false;
-		    }
+		  /* Remember that we read a byte, but don't complain
+		     about the error.  Because of the decoding error,
+		     this is a considered to be byte but not a
+		     character (that is, chars is not incremented).  */
 		  p++;
 		  bytes_read--;
 		}
author	James Youngman <jay@gnu.org>	2007-05-26 07:08:18 +0200
committer	Jim Meyering <jim@meyering.net>	2007-05-26 07:13:50 +0200
commit	e82c7ddd4737d3e402cb6c73d91b50afe8c5b7f7 (patch)
tree	8499ecbe6d89853aeeee0465f99c4c3ce3c81133
parent	8ab7f351a139ab25a14843c69699069242bfc510 (diff)
download	coreutils-e82c7ddd4737d3e402cb6c73d91b50afe8c5b7f7.tar.xz