summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Eggert <eggert@cs.ucla.edu>2014-05-06 18:38:09 -0700
committerPaul Eggert <eggert@cs.ucla.edu>2014-05-06 18:38:46 -0700
commit3974c0932df8281f4e0fa34c36c07a9d72f1155d (patch)
tree11ac3df3d1ed48724732d1cf747ae61d05236c3c
parent8840a00cd79b8beae72b1a0ec6b9e64912633c13 (diff)
downloadcoreutils-3974c0932df8281f4e0fa34c36c07a9d72f1155d.tar.xz
dd: fix conv=ascii, conv=ebcdic, conv=ibm to match POSIX
Problem reported by Don Baggett in <http:/bugs.gnu.org/17422>. * NEWS: * doc/coreutils.texi (dd invocation): Document this. * src/dd.c (conversions): conv=ascii implies conv=unblock. conv=ebcdic and conv=ibm imply conv=block. (ascii_to_ebcdic, ebcdic_to_ascii): Correct to match POSIX 1003.1-2013. * tests/dd/ascii.sh: New file. * tests/local.mk (all_tests): Add it.
-rw-r--r--NEWS21
-rw-r--r--doc/coreutils.texi11
-rw-r--r--src/dd.c37
-rwxr-xr-xtests/dd/ascii.sh71
-rw-r--r--tests/local.mk1
5 files changed, 122 insertions, 19 deletions
diff --git a/NEWS b/NEWS
index f7b511273..4efd60d2e 100644
--- a/NEWS
+++ b/NEWS
@@ -23,6 +23,27 @@ GNU coreutils NEWS -*- outline -*-
date could crash or go into an infinite loop when parsing a malformed TZ="".
[bug introduced with the --date='TZ="" ..' parsing feature in coreutils-5.3.0]
+ dd's ASCII and EBCDIC conversions were incompatible with common practice and
+ with POSIX, and have been corrected as follows. First, conv=ascii now
+ implies conv=unblock, and conv=ebcdic and conv=ibm now imply conv=block.
+ Second, the translation tables for dd conv=ascii and conv=ebcdic have been
+ corrected as shown in the following table, where A is the ASCII value, W is
+ the old, wrong EBCDIC value, and E is the new, corrected EBCDIC value; all
+ values are in octal.
+
+ A W E
+ 041 117 132
+ 133 112 255
+ 135 132 275
+ 136 137 232
+ 174 152 117
+ 176 241 137
+ 313 232 152
+ 325 255 112
+ 345 275 241
+
+ [These dd bugs were present in "the beginning".]
+
head --bytes=-N and --lines=-N now handles devices more
consistently, not ignoring data from virtual devices like /dev/zero,
or on BSD systems data from tty devices.
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index a949ffcac..789cd68e5 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -8621,21 +8621,26 @@ Conversions:
Convert EBCDIC to ASCII,
using the conversion table specified by POSIX@.
This provides a 1:1 translation for all 256 bytes.
+This option implies @samp{conv=unblock}; input is converted to
+ASCII before trailing spaces are deleted.
@item ebcdic
@opindex ebcdic@r{, converting to}
Convert ASCII to EBCDIC@.
This is the inverse of the @samp{ascii} conversion.
+This option implies @samp{conv=block}; trailing spaces are added
+before being converted to EBCDIC@.
@item ibm
@opindex alternate ebcdic@r{, converting to}
-Convert ASCII to alternate EBCDIC,
-using the alternate conversion table specified by POSIX@.
+This acts like @samp{conv=ebcdic}, except it
+uses the alternate conversion table specified by POSIX@.
This is not a 1:1 translation, but reflects common historical practice
for @samp{~}, @samp{[}, and @samp{]}.
The @samp{ascii}, @samp{ebcdic}, and @samp{ibm} conversions are
-mutually exclusive.
+mutually exclusive. If you use any of these options, you should also
+use the @samp{cbs=} option.
@item block
@opindex block @r{(space-padding)}
diff --git a/src/dd.c b/src/dd.c
index c7909e725..1e387f3d8 100644
--- a/src/dd.c
+++ b/src/dd.c
@@ -274,9 +274,9 @@ struct symbol_value
/* Conversion symbols, for conv="...". */
static struct symbol_value const conversions[] =
{
- {"ascii", C_ASCII | C_TWOBUFS}, /* EBCDIC to ASCII. */
- {"ebcdic", C_EBCDIC | C_TWOBUFS}, /* ASCII to EBCDIC. */
- {"ibm", C_IBM | C_TWOBUFS}, /* Slightly different ASCII to EBCDIC. */
+ {"ascii", C_ASCII | C_UNBLOCK | C_TWOBUFS}, /* EBCDIC to ASCII. */
+ {"ebcdic", C_EBCDIC | C_BLOCK | C_TWOBUFS}, /* ASCII to EBCDIC. */
+ {"ibm", C_IBM | C_BLOCK | C_TWOBUFS}, /* Different ASCII to EBCDIC. */
{"block", C_BLOCK | C_TWOBUFS}, /* Variable to fixed length records. */
{"unblock", C_UNBLOCK | C_TWOBUFS}, /* Fixed to variable length records. */
{"lcase", C_LCASE | C_TWOBUFS}, /* Translate upper to lower case. */
@@ -381,24 +381,29 @@ static struct symbol_value const statuses[] =
/* Translation table formed by applying successive transformations. */
static unsigned char trans_table[256];
+/* Standard translation tables, taken from POSIX 1003.1-2013.
+ Beware of imitations; there are lots of ASCII<->EBCDIC tables
+ floating around the net, perhaps valid for some applications but
+ not correct here. */
+
static char const ascii_to_ebcdic[] =
{
'\000', '\001', '\002', '\003', '\067', '\055', '\056', '\057',
'\026', '\005', '\045', '\013', '\014', '\015', '\016', '\017',
'\020', '\021', '\022', '\023', '\074', '\075', '\062', '\046',
'\030', '\031', '\077', '\047', '\034', '\035', '\036', '\037',
- '\100', '\117', '\177', '\173', '\133', '\154', '\120', '\175',
+ '\100', '\132', '\177', '\173', '\133', '\154', '\120', '\175',
'\115', '\135', '\134', '\116', '\153', '\140', '\113', '\141',
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
'\370', '\371', '\172', '\136', '\114', '\176', '\156', '\157',
'\174', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
'\310', '\311', '\321', '\322', '\323', '\324', '\325', '\326',
'\327', '\330', '\331', '\342', '\343', '\344', '\345', '\346',
- '\347', '\350', '\351', '\112', '\340', '\132', '\137', '\155',
+ '\347', '\350', '\351', '\255', '\340', '\275', '\232', '\155',
'\171', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
'\210', '\211', '\221', '\222', '\223', '\224', '\225', '\226',
'\227', '\230', '\231', '\242', '\243', '\244', '\245', '\246',
- '\247', '\250', '\251', '\300', '\152', '\320', '\241', '\007',
+ '\247', '\250', '\251', '\300', '\117', '\320', '\137', '\007',
'\040', '\041', '\042', '\043', '\044', '\025', '\006', '\027',
'\050', '\051', '\052', '\053', '\054', '\011', '\012', '\033',
'\060', '\061', '\032', '\063', '\064', '\065', '\066', '\010',
@@ -408,10 +413,10 @@ static char const ascii_to_ebcdic[] =
'\130', '\131', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\160', '\161', '\162', '\163', '\164', '\165',
'\166', '\167', '\170', '\200', '\212', '\213', '\214', '\215',
- '\216', '\217', '\220', '\232', '\233', '\234', '\235', '\236',
- '\237', '\240', '\252', '\253', '\254', '\255', '\256', '\257',
+ '\216', '\217', '\220', '\152', '\233', '\234', '\235', '\236',
+ '\237', '\240', '\252', '\253', '\254', '\112', '\256', '\257',
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+ '\270', '\271', '\272', '\273', '\274', '\241', '\276', '\277',
'\312', '\313', '\314', '\315', '\316', '\317', '\332', '\333',
'\334', '\335', '\336', '\337', '\352', '\353', '\354', '\355',
'\356', '\357', '\372', '\373', '\374', '\375', '\376', '\377'
@@ -464,21 +469,21 @@ static char const ebcdic_to_ascii[] =
'\220', '\221', '\026', '\223', '\224', '\225', '\226', '\004',
'\230', '\231', '\232', '\233', '\024', '\025', '\236', '\032',
'\040', '\240', '\241', '\242', '\243', '\244', '\245', '\246',
- '\247', '\250', '\133', '\056', '\074', '\050', '\053', '\041',
+ '\247', '\250', '\325', '\056', '\074', '\050', '\053', '\174',
'\046', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
- '\260', '\261', '\135', '\044', '\052', '\051', '\073', '\136',
+ '\260', '\261', '\041', '\044', '\052', '\051', '\073', '\176',
'\055', '\057', '\262', '\263', '\264', '\265', '\266', '\267',
- '\270', '\271', '\174', '\054', '\045', '\137', '\076', '\077',
+ '\270', '\271', '\313', '\054', '\045', '\137', '\076', '\077',
'\272', '\273', '\274', '\275', '\276', '\277', '\300', '\301',
'\302', '\140', '\072', '\043', '\100', '\047', '\075', '\042',
'\303', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
'\150', '\151', '\304', '\305', '\306', '\307', '\310', '\311',
'\312', '\152', '\153', '\154', '\155', '\156', '\157', '\160',
- '\161', '\162', '\313', '\314', '\315', '\316', '\317', '\320',
- '\321', '\176', '\163', '\164', '\165', '\166', '\167', '\170',
- '\171', '\172', '\322', '\323', '\324', '\325', '\326', '\327',
+ '\161', '\162', '\136', '\314', '\315', '\316', '\317', '\320',
+ '\321', '\345', '\163', '\164', '\165', '\166', '\167', '\170',
+ '\171', '\172', '\322', '\323', '\324', '\133', '\326', '\327',
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
- '\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+ '\340', '\341', '\342', '\343', '\344', '\135', '\346', '\347',
'\173', '\101', '\102', '\103', '\104', '\105', '\106', '\107',
'\110', '\111', '\350', '\351', '\352', '\353', '\354', '\355',
'\175', '\112', '\113', '\114', '\115', '\116', '\117', '\120',
diff --git a/tests/dd/ascii.sh b/tests/dd/ascii.sh
new file mode 100755
index 000000000..9ef158f78
--- /dev/null
+++ b/tests/dd/ascii.sh
@@ -0,0 +1,71 @@
+#!/bin/sh
+# test conv=ascii
+
+# Copyright (C) 2014 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ dd
+
+(
+ # Two lines, EBCDIC " A A" and " A ", followed by all the bytes in order.
+ printf '\100\301\100\301\100\301\100\100' &&
+ printf $(for i in $(seq 0 255); do printf '\\%03o' $i; done; echo '')
+) >in || framework_failure_
+
+(
+ # The converted lines, with trailing spaces removed.
+ printf ' A A\n A\n' &&
+ printf '\000\001\002\003\n\234\011\206\177\n' &&
+ printf '\227\215\216\013\n\014\015\016\017\n' &&
+ printf '\020\021\022\023\n\235\205\010\207\n' &&
+ printf '\030\031\222\217\n\034\035\036\037\n' &&
+ printf '\200\201\202\203\n\204\012\027\033\n' &&
+ printf '\210\211\212\213\n\214\005\006\007\n' &&
+ printf '\220\221\026\223\n\224\225\226\004\n' &&
+ printf '\230\231\232\233\n\024\025\236\032\n' &&
+ printf '\040\240\241\242\n\243\244\245\246\n' &&
+ printf '\247\250\325\056\n\074\050\053\174\n' &&
+ printf '\046\251\252\253\n\254\255\256\257\n' &&
+ printf '\260\261\041\044\n\052\051\073\176\n' &&
+ printf '\055\057\262\263\n\264\265\266\267\n' &&
+ printf '\270\271\313\054\n\045\137\076\077\n' &&
+ printf '\272\273\274\275\n\276\277\300\301\n' &&
+ printf '\302\140\072\043\n\100\047\075\042\n' &&
+ printf '\303\141\142\143\n\144\145\146\147\n' &&
+ printf '\150\151\304\305\n\306\307\310\311\n' &&
+ printf '\312\152\153\154\n\155\156\157\160\n' &&
+ printf '\161\162\136\314\n\315\316\317\320\n' &&
+ printf '\321\345\163\164\n\165\166\167\170\n' &&
+ printf '\171\172\322\323\n\324\133\326\327\n' &&
+ printf '\330\331\332\333\n\334\335\336\337\n' &&
+ printf '\340\341\342\343\n\344\135\346\347\n' &&
+ printf '\173\101\102\103\n\104\105\106\107\n' &&
+ printf '\110\111\350\351\n\352\353\354\355\n' &&
+ printf '\175\112\113\114\n\115\116\117\120\n' &&
+ printf '\121\122\356\357\n\360\361\362\363\n' &&
+ printf '\134\237\123\124\n\125\126\127\130\n' &&
+ printf '\131\132\364\365\n\366\367\370\371\n' &&
+ printf '\060\061\062\063\n\064\065\066\067\n' &&
+ printf '\070\071\372\373\n\374\375\376\377\n'
+) >exp || framework_failure_
+
+dd if=in of=out conv=ascii cbs=4
+cp ./in ./out ./exp /tmp
+
+fail=0
+compare exp out || fail=1
+
+Exit $fail
diff --git a/tests/local.mk b/tests/local.mk
index 6d4414488..5286bfb34 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -470,6 +470,7 @@ all_tests = \
tests/df/no-mtab-status.sh \
tests/df/skip-duplicates.sh \
tests/df/skip-rootfs.sh \
+ tests/dd/ascii.sh \
tests/dd/direct.sh \
tests/dd/misc.sh \
tests/dd/no-allocate.sh \