tests/misc/sort


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348

#!/bin/sh
# -*- perl -*-

# Copyright (C) 2008 Free Software Foundation, Inc.

# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

: ${top_srcdir=../..}
. $top_srcdir/tests/require-perl

me=`echo $0|sed 's,.*/,,'`
exec $PERL -w -I$top_srcdir/tests -MCoreutils -M"CuTmpdir qw($me)" -- - <<\EOF
require 5.003;
use strict;

my $prog = 'sort';

# Turn off localization of executable's output.
@ENV{qw(LANGUAGE LANG LC_ALL)} = ('C') x 3;

# Since each test is run with a file name and with redirected stdin,
# the name in the diagnostic is either the file name or "-".
# Normalize each diagnostic to use '-'.
my $normalize_filename = {ERR_SUBST => 's/^$prog: .*?:/$prog: -:/'};

my $no_file = "$prog: open failed: no-file: No such file or directory\n";

my @Tests =
(
["n1", '-n', {IN=>".01\n0\n"}, {OUT=>"0\n.01\n"}],
["n2", '-n', {IN=>".02\n.01\n"}, {OUT=>".01\n.02\n"}],
["n3", '-n', {IN=>".02\n.00\n"}, {OUT=>".00\n.02\n"}],
["n4", '-n', {IN=>".02\n.000\n"}, {OUT=>".000\n.02\n"}],
["n5", '-n', {IN=>".021\n.029\n"}, {OUT=>".021\n.029\n"}],

["n6", '-n', {IN=>".02\n.0*\n"}, {OUT=>".0*\n.02\n"}],
["n7", '-n', {IN=>".02\n.*\n"}, {OUT=>".*\n.02\n"}],
["n8a", '-s -n -k1,1', {IN=>".0a\n.0b\n"}, {OUT=>".0a\n.0b\n"}],
["n8b", '-s -n -k1,1', {IN=>".0b\n.0a\n"}, {OUT=>".0b\n.0a\n"}],
["n9a", '-s -n -k1,1', {IN=>".000a\n.000b\n"}, {OUT=>".000a\n.000b\n"}],
["n9b", '-s -n -k1,1', {IN=>".000b\n.000a\n"}, {OUT=>".000b\n.000a\n"}],
["n10a", '-s -n -k1,1', {IN=>".00a\n.000b\n"}, {OUT=>".00a\n.000b\n"}],
["n10b", '-s -n -k1,1', {IN=>".00b\n.000a\n"}, {OUT=>".00b\n.000a\n"}],
["n11a", '-s -n -k1,1', {IN=>".01a\n.010\n"}, {OUT=>".01a\n.010\n"}],
["n11b", '-s -n -k1,1', {IN=>".010\n.01a\n"}, {OUT=>".010\n.01a\n"}],

["01a", '', {IN=>"A\nB\nC\n"}, {OUT=>"A\nB\nC\n"}],
#
["02a", '-c', {IN=>"A\nB\nC\n"}, {OUT=>''}],
["02b", '-c', {IN=>"A\nC\nB\n"}, {OUT=>''}, {EXIT=>1},
 {ERR=>"$prog: -:3: disorder: B\n"}, $normalize_filename],
["02c", qw(-c -k1,1), {IN=>"a\na b\n"}, {OUT=>''}],
["02d", '-C', {IN=>"A\nB\nC\n"}, {OUT=>''}],
["02e", '-C', {IN=>"A\nC\nB\n"}, {OUT=>''}, {EXIT=>1}],
# This should fail because there are duplicate keys
["02m", '-cu', {IN=>"A\nA\n"}, {OUT=>''}, {EXIT=>1},
 {ERR=>"$prog: -:2: disorder: A\n"}, $normalize_filename],
["02n", '-cu', {IN=>"A\nB\n"}, {OUT=>''}],
["02o", '-cu', {IN=>"A\nB\nB\n"}, {OUT=>''}, {EXIT=>1},
 {ERR=>"$prog: -:3: disorder: B\n"}, $normalize_filename],
["02p", '-cu', {IN=>"B\nA\nB\n"}, {OUT=>''}, {EXIT=>1},
 {ERR=>"$prog: -:2: disorder: A\n"}, $normalize_filename],
#
["03a", '-k1', {IN=>"B\nA\n"}, {OUT=>"A\nB\n"}],
["03b", '-k1,1', {IN=>"B\nA\n"}, {OUT=>"A\nB\n"}],
["03c", qw(-k1 -k2), {IN=>"A b\nA a\n"}, {OUT=>"A a\nA b\n"}],
# Fail with a diagnostic when -k specifies field == 0.
["03d", '-k0', {EXIT=>2},
 {ERR=>"$prog: -: invalid field specification `0'\n"},
  $normalize_filename],
# Fail with a diagnostic when -k specifies character == 0.
["03e", '-k1.0', {EXIT=>2},
 {ERR=>"$prog: character offset is zero: invalid field specification `1.0'\n"}],
["03f", '-k1.1,-k0', {EXIT=>2},
 {ERR=>"$prog: invalid number after `,': invalid count at start of `-k0'\n"}],
# This is ok.
["03g", '-k1.1,1.0'],
# This is equivalent to 3f.
["03h", '-k1.1,1'],
# This too, is equivalent to 3f.
["03i", '-k1,1'],
#
["04a", '-nc', {IN=>"2\n11\n"}],
["04b", '-n', {IN=>"11\n2\n"}, {OUT=>"2\n11\n"}],
["04c", '-k1n', {IN=>"11\n2\n"}, {OUT=>"2\n11\n"}],
["04d", '-k1', {IN=>"11\n2\n"}, {OUT=>"11\n2\n"}],
["04e", '-k2', {IN=>"ignored B\nz-ig A\n"}, {OUT=>"z-ig A\nignored B\n"}],
#
["05a", '-k1,2', {IN=>"A B\nA A\n"}, {OUT=>"A A\nA B\n"}],
["05b", '-k1,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
["05c", qw(-k1 -k2), {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
["05d", '-k2,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
["05e", '-k2,2', {IN=>"A B Z\nA A A\n"}, {OUT=>"A A A\nA B Z\n"}],
["05f", '-k2,2', {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
#
["06a", qw(-k 1,2), {IN=>"A B\nA A\n"}, {OUT=>"A A\nA B\n"}],
["06b", qw(-k 1,2), {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
["06c", qw(-k 1 -k 2), {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
["06d", qw(-k 2,2), {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
["06e", qw(-k 2,2), {IN=>"A B Z\nA A A\n"}, {OUT=>"A A A\nA B Z\n"}],
["06f", qw(-k 2,2), {IN=>"A B A\nA A Z\n"}, {OUT=>"A A Z\nA B A\n"}],
#
["07a", qw(-k 2,3), {IN=>"9 a b\n7 a a\n"}, {OUT=>"7 a a\n9 a b\n"}],
["07b", qw(-k 2,3), {IN=>"a a b\nz a a\n"}, {OUT=>"z a a\na a b\n"}],
["07c", qw(-k 2,3), {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
["07d", qw(+1 -3), {IN=>"y k b\nz k a\n"}, {OUT=>"z k a\ny k b\n"}],
#
# report an error for `.' without following char spec
["08a", qw(-k 2.,3), {EXIT=>2},
 {ERR=>"$prog: invalid number after `.': invalid count at start of `,3'\n"}],
# report an error for `,' without following POS2
["08b", qw(-k 2,), {EXIT=>2},
 {ERR=>"$prog: invalid number after `,': invalid count at start of `'\n"}],
#
# Test new -g option.
["09a", '-g', {IN=>"1e2\n2e1\n"}, {OUT=>"2e1\n1e2\n"}],
# Make sure -n works how we expect.
["09b", '-n', {IN=>"1e2\n2e1\n"}, {OUT=>"1e2\n2e1\n"}],
["09c", '-n', {IN=>"2e1\n1e2\n"}, {OUT=>"1e2\n2e1\n"}],
["09d", '-k2g', {IN=>"a 1e2\nb 2e1\n"}, {OUT=>"b 2e1\na 1e2\n"}],
#
# Bug reported by Roger Peel <R.Peel@ee.surrey.ac.uk>
["10a", qw(-t : -k 2.2,2.2), {IN=>":ba\n:ab\n"}, {OUT=>":ba\n:ab\n"}],
# Equivalent to above, but using obsolescent `+pos -pos' option syntax.
["10b", qw(-t : +1.1 -1.2), {IN=>":ba\n:ab\n"}, {OUT=>":ba\n:ab\n"}],
#
# The same as the preceding two, but with input lines reversed.
["10c", qw(-t : -k 2.2,2.2), {IN=>":ab\n:ba\n"}, {OUT=>":ba\n:ab\n"}],
# Equivalent to above, but using obsolescent `+pos -pos' option syntax.
["10d", qw(-t : +1.1 -1.2), {IN=>":ab\n:ba\n"}, {OUT=>":ba\n:ab\n"}],
# Try without -t...
# But note that we have to count the delimiting space at the beginning
# of each field that has it.
["10a0", qw(-k 2.3,2.3), {IN=>"z ba\nz ab\n"}, {OUT=>"z ba\nz ab\n"}],
["10a1", qw(-k 1.2,1.2), {IN=>"ba\nab\n"}, {OUT=>"ba\nab\n"}],
["10a2", qw(-b -k 2.2,2.2), {IN=>"z ba\nz ab\n"}, {OUT=>"z ba\nz ab\n"}],
#
# An even simpler example demonstrating the bug.
["10e", qw(-k 1.2,1.2), {IN=>"ab\nba\n"}, {OUT=>"ba\nab\n"}],
#
# The way sort works on these inputs (10f and 10g) seems wrong to me.
# See http://git.sv.gnu.org/gitweb/?p=coreutils.git;a=commitdiff;h=3c467c0d223
# POSIX doesn't seem to say one way or the other, but that's the way all
# other sort implementations work.
["10f", qw(-t : -k 1.3,1.3), {IN=>":ab\n:ba\n"}, {OUT=>":ba\n:ab\n"}],
["10g", qw(-k 1.4,1.4), {IN=>"a ab\nb ba\n"}, {OUT=>"b ba\na ab\n"}],
#
# Exercise bug re using -b to skip trailing blanks.
["11a", qw(-t: -k1,1b -k2,2), {IN=>"a\t:a\na :b\n"}, {OUT=>"a\t:a\na :b\n"}],
["11b", qw(-t: -k1,1b -k2,2), {IN=>"a :b\na\t:a\n"}, {OUT=>"a\t:a\na :b\n"}],
["11c", qw(-t: -k2,2b -k3,3), {IN=>"z:a\t:a\na :b\n"}, {OUT=>"z:a\t:a\na :b\n"}],
# Before 1.22m, the first key comparison reported equality.
# With 1.22m, they compare different: "a" sorts before "a\n",
# and the second key spec isn't even used.
["11d", qw(-t: -k2,2b -k3,3), {IN=>"z:a :b\na\t:a\n"}, {OUT=>"a\t:a\nz:a :b\n"}],
#
# Exercise bug re comparing `-' and integers.
["12a", qw(-n -t: +1), {IN=>"a:1\nb:-\n"}, {OUT=>"b:-\na:1\n"}],
["12b", qw(-n -t: +1), {IN=>"b:-\na:1\n"}, {OUT=>"b:-\na:1\n"}],
# Try some other (e.g. `X') invalid character.
["12c", qw(-n -t: +1), {IN=>"a:1\nb:X\n"}, {OUT=>"b:X\na:1\n"}],
["12d", qw(-n -t: +1), {IN=>"b:X\na:1\n"}, {OUT=>"b:X\na:1\n"}],
# From Karl Heuer
["13a", '+0.1n', {IN=>"axx\nb-1\n"}, {OUT=>"b-1\naxx\n"}],
["13b", '+0.1n', {IN=>"b-1\naxx\n"}, {OUT=>"b-1\naxx\n"}],
#
# From Carl Johnson <carlj@cjlinux.home.org>
["14a", qw(-d -u), {IN=>"mal\nmal-\nmala\n"}, {OUT=>"mal\nmala\n"}],
# Be sure to fix the (translate && ignore) case in keycompare.
["14b", qw(-f -d -u), {IN=>"mal\nmal-\nmala\n"}, {OUT=>"mal\nmala\n"}],
#
# Experiment with -i.
["15a", qw(-i -u), {IN=>"a\na\1\n"}, {OUT=>"a\n"}],
["15b", qw(-i -u), {IN=>"a\n\1a\n"}, {OUT=>"a\n"}],
["15c", qw(-i -u), {IN=>"a\1\na\n"}, {OUT=>"a\1\n"}],
["15d", qw(-i -u), {IN=>"\1a\na\n"}, {OUT=>"\1a\n"}],
["15e", qw(-i -u), {IN=>"a\n\1\1\1\1\1a\1\1\1\1\n"}, {OUT=>"a\n"}],

# From Erick Branderhorst -- fixed around 1.19e
["16a", '-f',
 {IN=>"éminence\nüberhaupt\n's-Gravenhage\naëroclub\nAag\naagtappels\n"},
 {OUT=>"'s-Gravenhage\nAag\naagtappels\naëroclub\néminence\nüberhaupt\n"}],

# This provokes a one-byte memory overrun of a malloc'd block for versions
# of sort from textutils-1.19p and before.
["17", '-c', {IN=>"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n"}],

# POSIX says -n no longer implies -b, so here we're comparing ` 9' and `10'.
["18a", '-k1.1,1.2n', {IN=>" 901\n100\n"}, {OUT=>" 901\n100\n"}],

# Just like above, because the global `-b' has no effect on the
# key specifier when a key-specific option (`n' in this case) is used.
["18b", qw(-b -k1.1,1.2n), {IN=>" 901\n100\n"}, {OUT=>" 901\n100\n"}],

# Here we're comparing ` 90' and `10', because the `b' on the key-end specifier
# makes sort ignore leading blanks when determining that key's *end*.
["18c", '-k1.1,1.2nb', {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}],

# Here we're comparing `9' and `10', because the `b' on the key-start specifier
# makes sort ignore leading blanks when determining that key's *start*.
["18d", '-k1.1b,1.2n', {IN=>" 901\n100\n"}, {OUT=>" 901\n100\n"}],

# This compares `90' and `10', as it ignores leading blanks for both
# key start and key end.
["18e", qw(-nb -k1.1,1.2), {IN=>" 901\n100\n"}, {OUT=>"100\n 901\n"}],

# This looks odd, but works properly -- 2nd keyspec is never
# used because all lines are different.
["19a", qw(+0 +1nr), {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 1\nb 2\nb 3\n"}],

# The test *intended* by the author of the above, but using the
# more-intuitive POSIX-style -k options.
["19b", qw(-k1,1 -k2nr), {IN=>"b 2\nb 1\nb 3\n"}, {OUT=>"b 3\nb 2\nb 1\n"}],

# This test failed when sort-1.22 was compiled on a Next x86 system
# without optimization.  Without optimization gcc uses the buggy version
# of memcmp in the Next C library.  With optimization, gcc uses its
# (working) builtin version.  Test case form William Lewis.
["20a", '',
 {IN=>"_________U__free\n_________U__malloc\n_________U__abort\n_________U__memcpy\n_________U__memset\n_________U_dyld_stub_binding_helper\n_________U__malloc\n_________U___iob\n_________U__abort\n_________U__fprintf\n"},
 {OUT=>"_________U___iob\n_________U__abort\n_________U__abort\n_________U__fprintf\n_________U__free\n_________U__malloc\n_________U__malloc\n_________U__memcpy\n_________U__memset\n_________U_dyld_stub_binding_helper\n"}],

# Demonstrate that folding changes the ordering of e.g. A, a, and _
# because while they normally (in the C locale) collate like A, _, a,
# when using -f, `a' is compared as if it were `A'.
["21a", '', {IN=>"A\na\n_\n"}, {OUT=>"A\n_\na\n"}],
["21b", '-f', {IN=>"A\na\n_\n"}, {OUT=>"A\na\n_\n"}],
["21c", '-f', {IN=>"a\nA\n_\n"}, {OUT=>"A\na\n_\n"}],
["21d", '-f', {IN=>"_\na\nA\n"}, {OUT=>"A\na\n_\n"}],
["21e", '-f', {IN=>"a\n_\nA\n"}, {OUT=>"A\na\n_\n"}],
["21f", '-fs', {IN=>"A\na\n_\n"}, {OUT=>"A\na\n_\n"}],
["21g", '-fu', {IN=>"a\n_\n"}, {OUT=>"a\n_\n"}],

# This test failed until 1.22f.  From Zvi Har'El.
["22a", qw(-k 2,2fd -k 1,1r), {IN=>"3 b\n4 B\n"}, {OUT=>"4 B\n3 b\n"}],
["22b", qw(-k 2,2d  -k 1,1r), {IN=>"3 b\n4 b\n"}, {OUT=>"4 b\n3 b\n"}],

["no-file1", 'no-file', {EXIT=>2}, {ERR=>$no_file}],
# This test failed until 1.22f.  Sort didn't give an error.
# From Will Edgington.
["o-no-file1", qw(-o no-file no-file), {EXIT=>2}, {ERR=>$no_file}],

["create-empty", qw(-o no/such/file /dev/null), {EXIT=>2},
 {ERR=>"$prog: open failed: no/such/file: No such file or directory\n"}],

# From Paul Eggert.  This was fixed in textutils-1.22k.
["neg-nls", '-n', {IN=>"-1\n-9\n"}, {OUT=>"-9\n-1\n"}],

# From Paul Eggert.  This was fixed in textutils-1.22m.
# The bug was visible only when using the internationalized sorting code
# (i.e., not when configured with --disable-nls).
["nul-nls", '', {IN=>"\0b\n\0a\n"}, {OUT=>"\0a\n\0b\n"}],

# Paul Eggert wrote:
# I tested the revised `sort' against Solaris `sort', and found a
# discrepancy that turns out to be a longstanding bug in GNU sort.
# POSIX.2 specifies that a newline is part of the input line, and should
# be significant during comparison; but with GNU sort the newline is
# insignificant.  Here is an example of the bug:
#
#        $ od -c t
#        0000000  \n  \t  \n
#        0000003
#        $ sort t | od -c
#        0000000  \n  \t  \n
#        0000003
#
# The correct output of the latter command should be
#
#        0000000  \t  \n  \n
#        0000003
#
# because \t comes before \n in the collating sequence, and the trailing
# \n's are part of the input line.
["use-nl", '', {IN=>"\n\t\n"}, {OUT=>"\n\t\n"}],

# Specifying two -o options should evoke a failure
["o2", qw(-o x -o y), {EXIT=>2},
 {ERR=>"foo\n"}, {ERR_SUBST => 's/^$prog: .*/foo/'}],

# Specifying incompatible options should evoke a failure.
["incompat1", '-in', {EXIT=>2},
 {ERR=>"$prog: options `-in' are incompatible\n"}],
["incompat2", '-fR', {EXIT=>2},
 {ERR=>"$prog: options `-fR' are incompatible\n"}],
["incompat3", '-dfgiMnR', {EXIT=>2},
 {ERR=>"$prog: options `-dfgMnR' are incompatible\n"}],
["incompat4", qw(-c -o /dev/null), {EXIT=>2},
 {ERR=>"$prog: options `-co' are incompatible\n"}],
["incompat5", qw(-C -o /dev/null), {EXIT=>2},
 {ERR=>"$prog: options `-Co' are incompatible\n"}],
["incompat6", '-cC', {EXIT=>2},
 {ERR=>"$prog: options `-cC' are incompatible\n"}],
["incompat7", qw(--sort=random -n), {EXIT=>2},
 {ERR=>"$prog: options `-nR' are incompatible\n"}],

# -t '\0' is accepted, as of coreutils-5.0.91
['nul-tab', "-k2,2 -t '\\0'",
 {IN=>"a\0z\01\nb\0y\02\n"}, {OUT=>"b\0y\02\na\0z\01\n"}],

["bigfield", qw(-k 340282366920938463463374607431768211456),
 {IN=>"2\n1\n"}, {OUT=>"1\n2\n"}],

# Using an old-style key-specifying option like +1 with an invalid
# ordering-option character would cause sort to try to free an invalid
# (non-malloc'd) pointer.  This bug affects coreutils-6.5 through 6.9.
['obs-inval', '+1x', {EXIT=>2},
 {ERR=>"foo\n"}, {ERR_SUBST => 's/^$prog: .*/foo/'}],

# Exercise the code that enlarges the line buffer.  See the thread here:
# http://thread.gmane.org/gmane.comp.gnu.coreutils.bugs/11006
['realloc-buf', '-S1', {IN=>'a'x4000 ."\n"}, {OUT=>'a'x4000 ."\n"}],

["sort-numeric", '--sort=numeric', {IN=>".01\n0\n"}, {OUT=>"0\n.01\n"}],
["sort-gennum", '--sort=general-numeric',
  {IN=>"1e2\n2e1\n"}, {OUT=>"2e1\n1e2\n"}],
);

# Add _POSIX2_VERSION=199209 to the environment of each test
# that uses an old-style option like +1.
foreach my $t (@Tests)
  {
    foreach my $e (@$t)
      {
        !ref $e && $e =~ /\+\d/
          and push (@$t, {ENV=>'_POSIX2_VERSION=199209'}), last;
      }
  }

@Tests = triple_test \@Tests;

my $save_temps = $ENV{DEBUG};
my $verbose = $ENV{VERBOSE};

my $fail = run_tests ($prog, $prog, \@Tests, $save_temps, $verbose);
exit $fail;
EOF