md5sum, sha*sum: add --tag to output a format indicating the algorithm

The format used is the BSD traditional format which looks like: MD5 (/dev/null) = d41d8cd98f00b204e9800998ecf8427e * NEWS: Add new feature info. * doc/coreutils.texi (md5sum invocation): Add detailed information about the new --tag option. * src/md5sum.c: Add the new --tag option for BSD-style output. (bsd_split_3): Add ESCAPED_FILENAME parameter. (print_filename): New function refactored from main(). (filename_unescape): New function refactored from split_3(). * tests/misc/md5sum-bsd: Add tests for the new feature.
author: Ondrej Oprala <ooprala@redhat.com> 2012-08-02 13:31:50 +0200
committer: Pádraig Brady <P@draigBrady.com> 2012-08-24 15:56:26 +0100
commit: c9f4c323220f51a42e3da8ea79f9ddcedab041b9 (patch)
tree: a414c5e208e62404ecedea21479a7a625d3d6cfa
parent: dd22da8e9539cc88193987b6997769ae4ede2b15 (diff)
download: coreutils-c9f4c323220f51a42e3da8ea79f9ddcedab041b9.tar.xz
4 files changed, 208 insertions, 80 deletions
diff --git a/NEWS b/NEWS
index e6d79bf92..798a51295 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,12 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 * Noteworthy changes in release ?.? (????-??-??) [?]
 
+** New features
+
+  md5sum now accepts the --tag option to print BSD-style output with GNU
+  file name escaping.  This also affects sha1sum, sha224sum, sha256sum,
+  sha384sum and sha512sum.
+
 ** Bug fixes
 
   du no longer emits a "disk-corrupted"-style diagnostic when it detects
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 62b31fe1a..e015fc57f 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3705,6 +3705,17 @@ If all listed files are readable and are consistent with the associated
 MD5 checksums, exit successfully.  Otherwise exit with a status code
 indicating there was a failure.
 
+@item --tag
+@opindex --tag
+@cindex BSD output
+Output BSD style checksums, which indicate the checksum algorithm used.
+As a @acronym{GNU} extension, file names with problematic characters
+are escaped as described above, with the same escaping indicator of @samp{\}
+at the start of the line, being used.
+The @option{--tag} option implies binary mode, and is disallowed with
+@option{--text} mode as supporting that would unnecessarily complicate
+the output format, while providing little benefit.
+
 @item -t
 @itemx --text
 @opindex -t
@@ -3715,7 +3726,7 @@ outputting a @samp{ } flag.  This is the inverse of @option{--binary}.
 This option is the default on systems like @acronym{GNU} that do not
 distinguish between binary and text files.  On other systems, it is
 the default for reading standard input when standard input is a
-terminal.
+terminal.  This mode is never defaulted to if @option{--tag} is used.
 
 @item -w
 @itemx --warn
diff --git a/src/md5sum.c b/src/md5sum.c
index f7e084914..1663c1e3d 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -135,7 +135,8 @@ enum
 {
   STATUS_OPTION = CHAR_MAX + 1,
   QUIET_OPTION,
-  STRICT_OPTION
+  STRICT_OPTION,
+  TAG_OPTION
 };
 
 static struct option const long_options[] =
@@ -147,6 +148,7 @@ static struct option const long_options[] =
   { "text", no_argument, NULL, 't' },
   { "warn", no_argument, NULL, 'w' },
   { "strict", no_argument, NULL, STRICT_OPTION },
+  { "tag", no_argument, NULL, TAG_OPTION },
   { GETOPT_HELP_OPTION_DECL },
   { GETOPT_VERSION_OPTION_DECL },
   { NULL, 0, NULL, 0 }
@@ -179,6 +181,9 @@ With no FILE, or when FILE is -, read standard input.\n\
       printf (_("\
   -c, --check          read %s sums from the FILEs and check them\n"),
               DIGEST_TYPE_STRING);
+      fputs (_("\
+      --tag            create a BSD-style checksum\n\
+"), stdout);
       if (O_BINARY)
         fputs (_("\
   -t, --text           read in text mode (default if reading tty stdin)\n\
@@ -215,23 +220,73 @@ space for text), and name for each FILE.\n"),
 
 #define ISWHITE(c) ((c) == ' ' || (c) == '\t')
 
+/* Given a file name, S of length S_LEN, that is not NUL-terminated,
+   modify it in place, performing the equivalent of this sed substitution:
+   's/\\n/\n/g;s/\\\\/\\/g' i.e., replacing each "\\n" string with a newline
+   and each "\\\\" with a single backslash, NUL-terminate it and return S.
+   If S is not a valid escaped file name, i.e., if it ends with an odd number
+   of backslashes or if it contains a backslash followed by anything other
+   than "n" or another backslash, return NULL.  */
+
+static char *
+filename_unescape (char *s, size_t s_len)
+{
+  char *dst = s;
+
+  for (size_t i = 0; i < s_len; i++)
+    {
+      switch (s[i])
+        {
+        case '\\':
+          if (i == s_len - 1)
+            {
+              /* File name ends with an unescaped backslash: invalid.  */
+              return NULL;
+            }
+          ++i;
+          switch (s[i])
+            {
+            case 'n':
+              *dst++ = '\n';
+              break;
+            case '\\':
+              *dst++ = '\\';
+              break;
+            default:
+              /* Only '\' or 'n' may follow a backslash.  */
+              return NULL;
+            }
+          break;
+
+        case '\0':
+          /* The file name may not contain a NUL.  */
+          return NULL;
+
+        default:
+          *dst++ = s[i];
+          break;
+        }
+    }
+  if (dst < s + s_len)
+    *dst = '\0';
+
+  return s;
+}
+
 /* Split the checksum string S (of length S_LEN) from a BSD 'md5' or
    'sha1' command into two parts: a hexadecimal digest, and the file
    name.  S is modified.  Return true if successful.  */
 
 static bool
 bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest,
-             char **file_name)
+             char **file_name, bool escaped_filename)
 {
   size_t i;
 
   if (s_len == 0)
     return false;
 
-  *file_name = s;
-
-  /* Find end of filename. The BSD 'md5' and 'sha1' commands do not escape
-     filenames, so search backwards for the last ')'. */
+  /* Find end of filename.  */
   i = s_len - 1;
   while (i && s[i] != ')')
     i--;
@@ -239,6 +294,11 @@ bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest,
   if (s[i] != ')')
     return false;
 
+  *file_name = s;
+
+  if (escaped_filename && filename_unescape (s, i) == NULL)
+    return false;
+
   s[i++] = '\0';
 
   while (ISWHITE (s[i]))
@@ -271,7 +331,14 @@ split_3 (char *s, size_t s_len,
   while (ISWHITE (s[i]))
     ++i;
 
+  if (s[i] == '\\')
+    {
+      ++i;
+      escaped_filename = true;
+    }
+
   /* Check for BSD-style checksum line. */
+
   algo_name_len = strlen (DIGEST_TYPE_STRING);
   if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len))
     {
@@ -282,7 +349,7 @@ split_3 (char *s, size_t s_len,
           *binary = 0;
           return bsd_split_3 (s +      i + algo_name_len + 1,
                               s_len - (i + algo_name_len + 1),
-                              hex_digest, file_name);
+                              hex_digest, file_name, escaped_filename);
         }
     }
 
@@ -293,11 +360,6 @@ split_3 (char *s, size_t s_len,
   if (s_len - i < min_digest_line_length + (s[i] == '\\'))
     return false;
 
-  if (s[i] == '\\')
-    {
-      ++i;
-      escaped_filename = true;
-    }
   *hex_digest = (unsigned char *) &s[i];
 
   /* The first field has to be the n-character hexadecimal
@@ -333,49 +395,8 @@ split_3 (char *s, size_t s_len,
   *file_name = &s[i];
 
   if (escaped_filename)
-    {
-      /* Translate each '\n' string in the file name to a NEWLINE,
-         and each '\\' string to a backslash.  */
-
-      char *dst = &s[i];
-
-      while (i < s_len)
-        {
-          switch (s[i])
-            {
-            case '\\':
-              if (i == s_len - 1)
-                {
-                  /* A valid line does not end with a backslash.  */
-                  return false;
-                }
-              ++i;
-              switch (s[i++])
-                {
-                case 'n':
-                  *dst++ = '\n';
-                  break;
-                case '\\':
-                  *dst++ = '\\';
-                  break;
-                default:
-                  /* Only '\' or 'n' may follow a backslash.  */
-                  return false;
-                }
-              break;
-
-            case '\0':
-              /* The file name may not contain a NUL.  */
-              return false;
-              break;
+    return filename_unescape (&s[i], s_len - i) != NULL;
 
-            default:
-              *dst++ = s[i++];
-              break;
-            }
-        }
-      *dst = '\0';
-    }
   return true;
 }
 
@@ -636,6 +657,31 @@ digest_check (const char *checkfile_name)
           && (!strict || n_improperly_formatted_lines == 0));
 }
 
+static void
+print_filename (char const *file)
+{
+  /* Translate each NEWLINE byte to the string, "\\n",
+     and each backslash to "\\\\".  */
+  while (*file)
+    {
+      switch (*file)
+        {
+        case '\n':
+          fputs ("\\n", stdout);
+          break;
+
+        case '\\':
+          fputs ("\\\\", stdout);
+          break;
+
+        default:
+          putchar (*file);
+          break;
+        }
+      file++;
+    }
+}
+
 int
 main (int argc, char **argv)
 {
@@ -646,6 +692,7 @@ main (int argc, char **argv)
   int opt;
   bool ok = true;
   int binary = -1;
+  bool prefix_tag = false;
 
   /* Setting values of global variables.  */
   initialize_main (&argc, &argv);
@@ -690,6 +737,10 @@ main (int argc, char **argv)
       case STRICT_OPTION:
         strict = true;
         break;
+      case TAG_OPTION:
+        prefix_tag = true;
+        binary = 1;
+        break;
       case_GETOPT_HELP_CHAR;
       case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
       default:
@@ -699,6 +750,24 @@ main (int argc, char **argv)
   min_digest_line_length = MIN_DIGEST_LINE_LENGTH;
   digest_hex_bytes = DIGEST_HEX_BYTES;
 
+  if (prefix_tag && !binary)
+   {
+     /* This could be supported in a backwards compatible way
+        by prefixing the output line with a space in text mode.
+        However that's invasive enough that it was agreed to
+        not support this mode with --tag, as --text use cases
+        are adequately supported by the default output format.  */
+     error (0, 0, _("--tag does not support --text mode"));
+     usage (EXIT_FAILURE);
+   }
+
+  if (prefix_tag && do_check)
+    {
+      error (0, 0, _("the --tag option is meaningless when "
+                     "verifying checksums"));
+      usage (EXIT_FAILURE);
+    }
+
   if (0 <= binary && do_check)
     {
       error (0, 0, _("the --binary and --text options are meaningless when "
@@ -754,41 +823,36 @@ main (int argc, char **argv)
             ok = false;
           else
             {
+              if (prefix_tag)
+                {
+                  if (strchr (file, '\n') || strchr (file, '\\'))
+                    putchar ('\\');
+
+                  fputs (DIGEST_TYPE_STRING, stdout);
+                  fputs (" (", stdout);
+                  print_filename (file);
+                  fputs (") = ", stdout);
+                }
+
               size_t i;
 
               /* Output a leading backslash if the file name contains
                  a newline or backslash.  */
-              if (strchr (file, '\n') || strchr (file, '\\'))
+              if (!prefix_tag && (strchr (file, '\n') || strchr (file, '\\')))
                 putchar ('\\');
 
               for (i = 0; i < (digest_hex_bytes / 2); ++i)
                 printf ("%02x", bin_buffer[i]);
 
-              putchar (' ');
-              if (file_is_binary)
-                putchar ('*');
-              else
-                putchar (' ');
-
-              /* Translate each NEWLINE byte to the string, "\\n",
-                 and each backslash to "\\\\".  */
-              for (i = 0; i < strlen (file); ++i)
+              if (!prefix_tag)
                 {
-                  switch (file[i])
-                    {
-                    case '\n':
-                      fputs ("\\n", stdout);
-                      break;
-
-                    case '\\':
-                      fputs ("\\\\", stdout);
-                      break;
-
-                    default:
-                      putchar (file[i]);
-                      break;
-                    }
+                  putchar (' ');
+
+                  putchar (file_is_binary ? '*' : ' ');
+
+                  print_filename (file);
                 }
+
               putchar ('\n');
             }
         }
diff --git a/tests/misc/md5sum-bsd b/tests/misc/md5sum-bsd
index 8226d7ab7..ce4117679 100755
--- a/tests/misc/md5sum-bsd
+++ b/tests/misc/md5sum-bsd
@@ -1,5 +1,6 @@
 #!/bin/sh
-# make sure 'md5sum -c' works for alternate BSD format (md5 -r)
+# 'md5sum' tests for generation and checking of
+# BSD traditional and alternate formats (md5 [-r])
 
 # Copyright (C) 2011-2012 Free Software Foundation, Inc.
 
@@ -19,6 +20,9 @@
 . "${srcdir=.}/init.sh"; path_prepend_ ../src
 print_ver_ md5sum
 
+## BSD alternate format tests ##
+
+# Ensure we can --check BSD alternate format.
 # Note we start this list with a name
 # that's unambiguous in BSD format.
 # I.E. one not starting with ' ' or '*'
@@ -38,4 +42,47 @@ md5sum --strict -c check.md5 || fail=1
 # an option to avoid the ambiguity.
 tail -n+2 check.md5 | md5sum --strict -c && fail=1
 
+
+## BSD traditional format tests (--tag option) ##
+
+# Ensure --tag and --check are mutually exclusive
+md5sum --tag --check /dev/null && fail=1
+
+# Ensure --tag and --text are mutually exclusive
+# We don't support --text with BSD tradition format,
+# as that would complicate the output format,
+# while providing little benefit over --text processing
+# available with the default md5sum output format.
+md5sum --tag --text /dev/null && fail=1
+
+# Ensure we can --check BSD traditional format we produce
+rm check.md5
+for i in 'a' ' b' '*c' 'dd' ' '; do
+  echo "$i" > "$i"
+  md5sum --tag "$i" >> check.md5
+done
+md5sum --strict -c check.md5 || fail=1
+
+# Ensure we can --check BSD traditional format we produce
+# with the GNU extension of escaped newlines
+nl='
+'
+tab='	'
+rm check.md5
+for i in 'a\b' 'a\' "a${nl}b" "a${tab}b"; do
+  :> "$i"
+  md5sum --tag "$i" >> check.md5
+done
+md5sum --strict -c check.md5 || fail=1
+
+# Ensure BSD traditional format with GNU extension escapes
+# is in the expected format
+ex_file='test
+\\file'
+ex_output='\MD5 (test\n\\\\file) = d41d8cd98f00b204e9800998ecf8427e'
+touch "$ex_file"
+printf "%s\n" "$ex_output" > exp
+md5sum --tag "$ex_file" > out
+compare exp out || fail=1
+
 Exit $fail
author	Ondrej Oprala <ooprala@redhat.com>	2012-08-02 13:31:50 +0200
committer	Pádraig Brady <P@draigBrady.com>	2012-08-24 15:56:26 +0100
commit	c9f4c323220f51a42e3da8ea79f9ddcedab041b9 (patch)
tree	a414c5e208e62404ecedea21479a7a625d3d6cfa
parent	dd22da8e9539cc88193987b6997769ae4ede2b15 (diff)
download	coreutils-c9f4c323220f51a42e3da8ea79f9ddcedab041b9.tar.xz