diff options
-rw-r--r-- | NEWS | 6 | ||||
-rw-r--r-- | doc/coreutils.texi | 13 | ||||
-rw-r--r-- | src/md5sum.c | 220 | ||||
-rwxr-xr-x | tests/misc/md5sum-bsd | 49 |
4 files changed, 208 insertions, 80 deletions
@@ -2,6 +2,12 @@ GNU coreutils NEWS -*- outline -*- * Noteworthy changes in release ?.? (????-??-??) [?] +** New features + + md5sum now accepts the --tag option to print BSD-style output with GNU + file name escaping. This also affects sha1sum, sha224sum, sha256sum, + sha384sum and sha512sum. + ** Bug fixes du no longer emits a "disk-corrupted"-style diagnostic when it detects diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 62b31fe1a..e015fc57f 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -3705,6 +3705,17 @@ If all listed files are readable and are consistent with the associated MD5 checksums, exit successfully. Otherwise exit with a status code indicating there was a failure. +@item --tag +@opindex --tag +@cindex BSD output +Output BSD style checksums, which indicate the checksum algorithm used. +As a @acronym{GNU} extension, file names with problematic characters +are escaped as described above, with the same escaping indicator of @samp{\} +at the start of the line, being used. +The @option{--tag} option implies binary mode, and is disallowed with +@option{--text} mode as supporting that would unnecessarily complicate +the output format, while providing little benefit. + @item -t @itemx --text @opindex -t @@ -3715,7 +3726,7 @@ outputting a @samp{ } flag. This is the inverse of @option{--binary}. This option is the default on systems like @acronym{GNU} that do not distinguish between binary and text files. On other systems, it is the default for reading standard input when standard input is a -terminal. +terminal. This mode is never defaulted to if @option{--tag} is used. @item -w @itemx --warn diff --git a/src/md5sum.c b/src/md5sum.c index f7e084914..1663c1e3d 100644 --- a/src/md5sum.c +++ b/src/md5sum.c @@ -135,7 +135,8 @@ enum { STATUS_OPTION = CHAR_MAX + 1, QUIET_OPTION, - STRICT_OPTION + STRICT_OPTION, + TAG_OPTION }; static struct option const long_options[] = @@ -147,6 +148,7 @@ static struct option const long_options[] = { "text", no_argument, NULL, 't' }, { "warn", no_argument, NULL, 'w' }, { "strict", no_argument, NULL, STRICT_OPTION }, + { "tag", no_argument, NULL, TAG_OPTION }, { GETOPT_HELP_OPTION_DECL }, { GETOPT_VERSION_OPTION_DECL }, { NULL, 0, NULL, 0 } @@ -179,6 +181,9 @@ With no FILE, or when FILE is -, read standard input.\n\ printf (_("\ -c, --check read %s sums from the FILEs and check them\n"), DIGEST_TYPE_STRING); + fputs (_("\ + --tag create a BSD-style checksum\n\ +"), stdout); if (O_BINARY) fputs (_("\ -t, --text read in text mode (default if reading tty stdin)\n\ @@ -215,23 +220,73 @@ space for text), and name for each FILE.\n"), #define ISWHITE(c) ((c) == ' ' || (c) == '\t') +/* Given a file name, S of length S_LEN, that is not NUL-terminated, + modify it in place, performing the equivalent of this sed substitution: + 's/\\n/\n/g;s/\\\\/\\/g' i.e., replacing each "\\n" string with a newline + and each "\\\\" with a single backslash, NUL-terminate it and return S. + If S is not a valid escaped file name, i.e., if it ends with an odd number + of backslashes or if it contains a backslash followed by anything other + than "n" or another backslash, return NULL. */ + +static char * +filename_unescape (char *s, size_t s_len) +{ + char *dst = s; + + for (size_t i = 0; i < s_len; i++) + { + switch (s[i]) + { + case '\\': + if (i == s_len - 1) + { + /* File name ends with an unescaped backslash: invalid. */ + return NULL; + } + ++i; + switch (s[i]) + { + case 'n': + *dst++ = '\n'; + break; + case '\\': + *dst++ = '\\'; + break; + default: + /* Only '\' or 'n' may follow a backslash. */ + return NULL; + } + break; + + case '\0': + /* The file name may not contain a NUL. */ + return NULL; + + default: + *dst++ = s[i]; + break; + } + } + if (dst < s + s_len) + *dst = '\0'; + + return s; +} + /* Split the checksum string S (of length S_LEN) from a BSD 'md5' or 'sha1' command into two parts: a hexadecimal digest, and the file name. S is modified. Return true if successful. */ static bool bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest, - char **file_name) + char **file_name, bool escaped_filename) { size_t i; if (s_len == 0) return false; - *file_name = s; - - /* Find end of filename. The BSD 'md5' and 'sha1' commands do not escape - filenames, so search backwards for the last ')'. */ + /* Find end of filename. */ i = s_len - 1; while (i && s[i] != ')') i--; @@ -239,6 +294,11 @@ bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest, if (s[i] != ')') return false; + *file_name = s; + + if (escaped_filename && filename_unescape (s, i) == NULL) + return false; + s[i++] = '\0'; while (ISWHITE (s[i])) @@ -271,7 +331,14 @@ split_3 (char *s, size_t s_len, while (ISWHITE (s[i])) ++i; + if (s[i] == '\\') + { + ++i; + escaped_filename = true; + } + /* Check for BSD-style checksum line. */ + algo_name_len = strlen (DIGEST_TYPE_STRING); if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len)) { @@ -282,7 +349,7 @@ split_3 (char *s, size_t s_len, *binary = 0; return bsd_split_3 (s + i + algo_name_len + 1, s_len - (i + algo_name_len + 1), - hex_digest, file_name); + hex_digest, file_name, escaped_filename); } } @@ -293,11 +360,6 @@ split_3 (char *s, size_t s_len, if (s_len - i < min_digest_line_length + (s[i] == '\\')) return false; - if (s[i] == '\\') - { - ++i; - escaped_filename = true; - } *hex_digest = (unsigned char *) &s[i]; /* The first field has to be the n-character hexadecimal @@ -333,49 +395,8 @@ split_3 (char *s, size_t s_len, *file_name = &s[i]; if (escaped_filename) - { - /* Translate each '\n' string in the file name to a NEWLINE, - and each '\\' string to a backslash. */ - - char *dst = &s[i]; - - while (i < s_len) - { - switch (s[i]) - { - case '\\': - if (i == s_len - 1) - { - /* A valid line does not end with a backslash. */ - return false; - } - ++i; - switch (s[i++]) - { - case 'n': - *dst++ = '\n'; - break; - case '\\': - *dst++ = '\\'; - break; - default: - /* Only '\' or 'n' may follow a backslash. */ - return false; - } - break; - - case '\0': - /* The file name may not contain a NUL. */ - return false; - break; + return filename_unescape (&s[i], s_len - i) != NULL; - default: - *dst++ = s[i++]; - break; - } - } - *dst = '\0'; - } return true; } @@ -636,6 +657,31 @@ digest_check (const char *checkfile_name) && (!strict || n_improperly_formatted_lines == 0)); } +static void +print_filename (char const *file) +{ + /* Translate each NEWLINE byte to the string, "\\n", + and each backslash to "\\\\". */ + while (*file) + { + switch (*file) + { + case '\n': + fputs ("\\n", stdout); + break; + + case '\\': + fputs ("\\\\", stdout); + break; + + default: + putchar (*file); + break; + } + file++; + } +} + int main (int argc, char **argv) { @@ -646,6 +692,7 @@ main (int argc, char **argv) int opt; bool ok = true; int binary = -1; + bool prefix_tag = false; /* Setting values of global variables. */ initialize_main (&argc, &argv); @@ -690,6 +737,10 @@ main (int argc, char **argv) case STRICT_OPTION: strict = true; break; + case TAG_OPTION: + prefix_tag = true; + binary = 1; + break; case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); default: @@ -699,6 +750,24 @@ main (int argc, char **argv) min_digest_line_length = MIN_DIGEST_LINE_LENGTH; digest_hex_bytes = DIGEST_HEX_BYTES; + if (prefix_tag && !binary) + { + /* This could be supported in a backwards compatible way + by prefixing the output line with a space in text mode. + However that's invasive enough that it was agreed to + not support this mode with --tag, as --text use cases + are adequately supported by the default output format. */ + error (0, 0, _("--tag does not support --text mode")); + usage (EXIT_FAILURE); + } + + if (prefix_tag && do_check) + { + error (0, 0, _("the --tag option is meaningless when " + "verifying checksums")); + usage (EXIT_FAILURE); + } + if (0 <= binary && do_check) { error (0, 0, _("the --binary and --text options are meaningless when " @@ -754,41 +823,36 @@ main (int argc, char **argv) ok = false; else { + if (prefix_tag) + { + if (strchr (file, '\n') || strchr (file, '\\')) + putchar ('\\'); + + fputs (DIGEST_TYPE_STRING, stdout); + fputs (" (", stdout); + print_filename (file); + fputs (") = ", stdout); + } + size_t i; /* Output a leading backslash if the file name contains a newline or backslash. */ - if (strchr (file, '\n') || strchr (file, '\\')) + if (!prefix_tag && (strchr (file, '\n') || strchr (file, '\\'))) putchar ('\\'); for (i = 0; i < (digest_hex_bytes / 2); ++i) printf ("%02x", bin_buffer[i]); - putchar (' '); - if (file_is_binary) - putchar ('*'); - else - putchar (' '); - - /* Translate each NEWLINE byte to the string, "\\n", - and each backslash to "\\\\". */ - for (i = 0; i < strlen (file); ++i) + if (!prefix_tag) { - switch (file[i]) - { - case '\n': - fputs ("\\n", stdout); - break; - - case '\\': - fputs ("\\\\", stdout); - break; - - default: - putchar (file[i]); - break; - } + putchar (' '); + + putchar (file_is_binary ? '*' : ' '); + + print_filename (file); } + putchar ('\n'); } } diff --git a/tests/misc/md5sum-bsd b/tests/misc/md5sum-bsd index 8226d7ab7..ce4117679 100755 --- a/tests/misc/md5sum-bsd +++ b/tests/misc/md5sum-bsd @@ -1,5 +1,6 @@ #!/bin/sh -# make sure 'md5sum -c' works for alternate BSD format (md5 -r) +# 'md5sum' tests for generation and checking of +# BSD traditional and alternate formats (md5 [-r]) # Copyright (C) 2011-2012 Free Software Foundation, Inc. @@ -19,6 +20,9 @@ . "${srcdir=.}/init.sh"; path_prepend_ ../src print_ver_ md5sum +## BSD alternate format tests ## + +# Ensure we can --check BSD alternate format. # Note we start this list with a name # that's unambiguous in BSD format. # I.E. one not starting with ' ' or '*' @@ -38,4 +42,47 @@ md5sum --strict -c check.md5 || fail=1 # an option to avoid the ambiguity. tail -n+2 check.md5 | md5sum --strict -c && fail=1 + +## BSD traditional format tests (--tag option) ## + +# Ensure --tag and --check are mutually exclusive +md5sum --tag --check /dev/null && fail=1 + +# Ensure --tag and --text are mutually exclusive +# We don't support --text with BSD tradition format, +# as that would complicate the output format, +# while providing little benefit over --text processing +# available with the default md5sum output format. +md5sum --tag --text /dev/null && fail=1 + +# Ensure we can --check BSD traditional format we produce +rm check.md5 +for i in 'a' ' b' '*c' 'dd' ' '; do + echo "$i" > "$i" + md5sum --tag "$i" >> check.md5 +done +md5sum --strict -c check.md5 || fail=1 + +# Ensure we can --check BSD traditional format we produce +# with the GNU extension of escaped newlines +nl=' +' +tab=' ' +rm check.md5 +for i in 'a\b' 'a\' "a${nl}b" "a${tab}b"; do + :> "$i" + md5sum --tag "$i" >> check.md5 +done +md5sum --strict -c check.md5 || fail=1 + +# Ensure BSD traditional format with GNU extension escapes +# is in the expected format +ex_file='test +\\file' +ex_output='\MD5 (test\n\\\\file) = d41d8cd98f00b204e9800998ecf8427e' +touch "$ex_file" +printf "%s\n" "$ex_output" > exp +md5sum --tag "$ex_file" > out +compare exp out || fail=1 + Exit $fail |