From f940fece04ec0265ad8c52046f7678ad3116a305 Mon Sep 17 00:00:00 2001 From: Pádraig Brady
Date: Mon, 28 Apr 2014 13:29:41 +0100 Subject: ptx: fix whitespace trimming with multiple files This issue was identified by running the test suite with http://code.google.com/p/address-sanitizer/ which is included in GCC 4.8 and enabled with -fsanitize=address This was checked on Fedora 20 with GCC 4.8 as follows: $ yum install libasan # http://bugzilla.redhat.com/991003 $ rm -f src/ptx.o $ make check AM_CFLAGS='-fsanitize=address' SUBDIRS=. VERBOSE=yes $ failure identified in tests/test-suite.log To see this particular failure triggered with multiple files: $ src/ptx <(echo a) <(echo a) 2>&1 | asan_symbolize.py -d ================================================================= ==32178==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60200000e74f at pc 0x435442 bp 0x7fffe8a1b290 sp 0x7fffe8a1b288 READ of size 1 at 0x60200000e74f thread T0 #0 0x435441 in define_all_fields coreutils/src/ptx.c:1425 #1 0x7fa206d31d64 in __libc_start_main ??:? #2 0x42f77c in _start ??:? 0x60200000e74f is located 1 bytes to the left of 3-byte region [0x60200000e750,0x60200000e753) allocated by thread T0 here: #0 0x421809 in realloc ??:? #1 0x439b4e in fread_file coreutils/lib/read-file.c:97 Shadow bytes around the buggy address: 0x0c047fff9c90: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff9ca0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff9cb0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff9cc0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff9cd0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fd fd =>0x0c047fff9ce0: fa fa 03 fa fa fa fd fd fa[fa]03 fa fa fa 00 00 0x0c047fff9cf0: fa fa 04 fa fa fa 04 fa fa fa fd fa fa fa fd fa 0x0c047fff9d00: fa fa 00 fa fa fa fd fa fa fa 00 fa fa fa 00 fa 0x0c047fff9d10: fa fa fd fa fa fa fd fa fa fa fd fa fa fa fd fa 0x0c047fff9d20: fa fa fd fa fa fa fd fa fa fa fd fa fa fa fd fa 0x0c047fff9d30: fa fa fd fa fa fa 00 fa fa fa 00 fa fa fa 00 fa Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Heap right redzone: fb Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack partial redzone: f4 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 ASan internal: fe ==32178==ABORTING The initial report and high level analysis were from Jim Meyering... "The underlying problem is that swallow_file_in_memory() is setting the contents of the global text_buffer for the first file, then updating it (clobbering old value) for the second file. Yet, some pointers to the initial buffer have been squirreled away and later, one of them (keyafter) is presumed to point into the new "text_buffer", which it does not. The subsequent SKIP_WHITE_BACKWARDS use backs up "cursor" and goes out of bounds." * src/ptx.c (text_buffers): Maintain references for the limits of each buffer corresponding to each file, rather than just the last processed. (struct OCCURS): Add a member to map back to the corresponding file. Note normally this could be computed from the "reference" member rather than needing the extra storage, however this is not possible when in --references mode. (find_occurs_in_text): Reference the array rather than a single entry. (define_all_fields): Likewise. Also avoid computing the file index since this is now stored directly. (main): Update text_buffers[] array rather than a single text_buffer. * tests/misc/ptx-overrun.sh: Even though this issue is already triggered with AddressSanitizer, add a new case to demonstrate the whitespace trimming issue, and to trigger without AddressSanitizer. Fixes https://bugs.gnu.org/16171 --- NEWS | 3 +++ src/ptx.c | 69 ++++++++++++++++++++++++++--------------------- tests/misc/ptx-overrun.sh | 13 +++++++-- 3 files changed, 52 insertions(+), 33 deletions(-) diff --git a/NEWS b/NEWS index 55f0b5881..7855a4811 100644 --- a/NEWS +++ b/NEWS @@ -43,6 +43,9 @@ GNU coreutils NEWS -*- outline -*- ptx --format long option parsing no longer falls through into the --help case. [bug introduced in TEXTUTILS-1_22i] + ptx now consistently trims whitespace when processing multiple files. + [This bug was present in "the beginning".] + shuf --repeat no longer dumps core if the input is empty. [bug introduced with the --repeat feature in coreutils-8.22] diff --git a/src/ptx.c b/src/ptx.c index be342eefc..d165e9665 100644 --- a/src/ptx.c +++ b/src/ptx.c @@ -166,7 +166,7 @@ static int total_line_count; /* total number of lines seen so far */ static const char **input_file_name; /* array of text input file names */ static int *file_line_count; /* array of 'total_line_count' values at end */ -static BLOCK text_buffer; /* file to study */ +static BLOCK *text_buffers; /* files to study */ /* SKIP_NON_WHITE used only for getting or skipping the reference. */ @@ -232,6 +232,7 @@ typedef struct DELTA left; /* distance to left context start */ DELTA right; /* distance to right context end */ int reference; /* reference descriptor */ + size_t file_index; /* corresponding file */ } OCCURS; @@ -744,7 +745,7 @@ digest_word_file (const char *file_name, WORD_TABLE *table) `----------------------------------------------------------------------*/ static void -find_occurs_in_text (void) +find_occurs_in_text (size_t file_index) { char *cursor; /* for scanning the source text */ char *scan; /* for scanning the source text also */ @@ -760,6 +761,8 @@ find_occurs_in_text (void) char *word_end; /* end of word */ char *next_context_start; /* next start of left context */ + const BLOCK *text_buffer = &text_buffers[file_index]; + /* reference_length is always used within 'if (input_reference)'. However, GNU C diagnoses that it may be used uninitialized. The following assignment is merely to shut it up. */ @@ -775,19 +778,19 @@ find_occurs_in_text (void) found inside it. Also, unconditionally assigning these variable has the happy effect of shutting up lint. */ - line_start = text_buffer.start; + line_start = text_buffer->start; line_scan = line_start; if (input_reference) { - SKIP_NON_WHITE (line_scan, text_buffer.end); + SKIP_NON_WHITE (line_scan, text_buffer->end); reference_length = line_scan - line_start; - SKIP_WHITE (line_scan, text_buffer.end); + SKIP_WHITE (line_scan, text_buffer->end); } /* Process the whole buffer, one line or one sentence at a time. */ - for (cursor = text_buffer.start; - cursor < text_buffer.end; + for (cursor = text_buffer->start; + cursor < text_buffer->end; cursor = next_context_start) { @@ -805,11 +808,11 @@ find_occurs_in_text (void) This test also accounts for the case of an incomplete line or sentence at the end of the buffer. */ - next_context_start = text_buffer.end; + next_context_start = text_buffer->end; if (context_regex.string) switch (re_search (&context_regex.pattern, cursor, - text_buffer.end - cursor, - 0, text_buffer.end - cursor, &context_regs)) + text_buffer->end - cursor, + 0, text_buffer->end - cursor, &context_regs)) { case -2: matcher_error (); @@ -915,7 +918,7 @@ find_occurs_in_text (void) total_line_count++; line_scan++; line_start = line_scan; - SKIP_NON_WHITE (line_scan, text_buffer.end); + SKIP_NON_WHITE (line_scan, text_buffer->end); reference_length = line_scan - line_start; } else @@ -956,7 +959,7 @@ find_occurs_in_text (void) occurs_cursor = occurs_table[0] + number_of_occurs[0]; - /* Define the refence field, if any. */ + /* Define the reference field, if any. */ if (auto_reference) { @@ -973,7 +976,7 @@ find_occurs_in_text (void) total_line_count++; line_scan++; line_start = line_scan; - SKIP_NON_WHITE (line_scan, text_buffer.end); + SKIP_NON_WHITE (line_scan, text_buffer->end); } else line_scan++; @@ -1007,6 +1010,7 @@ find_occurs_in_text (void) occurs_cursor->key = possible_key; occurs_cursor->left = context_start - possible_key.start; occurs_cursor->right = context_end - possible_key.start; + occurs_cursor->file_index = file_index; number_of_occurs[0]++; } @@ -1356,9 +1360,10 @@ define_all_fields (OCCURS *occurs) char *left_context_start; /* start of left context */ char *right_context_end; /* end of right context */ char *left_field_start; /* conservative start for 'head'/'before' */ - int file_index; /* index in text input file arrays */ const char *file_name; /* file name for reference */ int line_ordinal; /* line ordinal for reference */ + const char *buffer_start; /* start of buffered file for this occurs */ + const char *buffer_end; /* end of buffered file for this occurs */ /* Define 'keyafter', start of left context and end of right context. 'keyafter' starts at the saved position for keyword and extend to the @@ -1371,6 +1376,9 @@ define_all_fields (OCCURS *occurs) left_context_start = keyafter.start + occurs->left; right_context_end = keyafter.start + occurs->right; + buffer_start = text_buffers[occurs->file_index].start; + buffer_end = text_buffers[occurs->file_index].end; + cursor = keyafter.end; while (cursor < right_context_end && cursor <= keyafter.start + keyafter_max_width) @@ -1422,13 +1430,13 @@ define_all_fields (OCCURS *occurs) if (truncation_string) { cursor = before.start; - SKIP_WHITE_BACKWARDS (cursor, text_buffer.start); + SKIP_WHITE_BACKWARDS (cursor, buffer_start); before_truncation = cursor > left_context_start; } else before_truncation = 0; - SKIP_WHITE (before.start, text_buffer.end); + SKIP_WHITE (before.start, buffer_end); /* The tail could not take more columns than what has been left in the left context field, and a gap is mandatory. It starts after the @@ -1443,7 +1451,7 @@ define_all_fields (OCCURS *occurs) if (tail_max_width > 0) { tail.start = keyafter.end; - SKIP_WHITE (tail.start, text_buffer.end); + SKIP_WHITE (tail.start, buffer_end); tail.end = tail.start; cursor = tail.end; @@ -1489,7 +1497,7 @@ define_all_fields (OCCURS *occurs) if (head_max_width > 0) { head.end = before.start; - SKIP_WHITE_BACKWARDS (head.end, text_buffer.start); + SKIP_WHITE_BACKWARDS (head.end, buffer_start); head.start = left_field_start; while (head.start + head_max_width < head.end) @@ -1520,21 +1528,16 @@ define_all_fields (OCCURS *occurs) { /* Construct the reference text in preallocated space from the file - name and the line number. Find out in which file the reference - occurred. Standard input yields an empty file name. Insure line - numbers are one based, even if they are computed zero based. */ - - file_index = 0; - while (file_line_count[file_index] < occurs->reference) - file_index++; + name and the line number. Standard input yields an empty file name. + Ensure line numbers are 1 based, even if they are computed 0 based. */ - file_name = input_file_name[file_index]; + file_name = input_file_name[occurs->file_index]; if (!file_name) file_name = ""; line_ordinal = occurs->reference + 1; - if (file_index > 0) - line_ordinal -= file_line_count[file_index - 1]; + if (occurs->file_index > 0) + line_ordinal -= file_line_count[occurs->file_index - 1]; sprintf (reference.start, "%s:%d", file_name, line_ordinal); reference.end = reference.start + strlen (reference.start); @@ -2034,6 +2037,7 @@ main (int argc, char **argv) input_file_name = xmalloc (sizeof *input_file_name); file_line_count = xmalloc (sizeof *file_line_count); + text_buffers = xmalloc (sizeof *text_buffers); number_input_files = 1; input_file_name[0] = NULL; } @@ -2042,6 +2046,7 @@ main (int argc, char **argv) number_input_files = argc - optind; input_file_name = xmalloc (number_input_files * sizeof *input_file_name); file_line_count = xmalloc (number_input_files * sizeof *file_line_count); + text_buffers = xmalloc (number_input_files * sizeof *text_buffers); for (file_index = 0; file_index < number_input_files; file_index++) { @@ -2060,6 +2065,7 @@ main (int argc, char **argv) number_input_files = 1; input_file_name = xmalloc (sizeof *input_file_name); file_line_count = xmalloc (sizeof *file_line_count); + text_buffers = xmalloc (sizeof *text_buffers); if (!*argv[optind] || STREQ (argv[optind], "-")) input_file_name[0] = NULL; else @@ -2126,11 +2132,12 @@ main (int argc, char **argv) for (file_index = 0; file_index < number_input_files; file_index++) { + BLOCK *text_buffer = text_buffers + file_index; - /* Read the file in core, than study it. */ + /* Read the file in core, then study it. */ - swallow_file_in_memory (input_file_name[file_index], &text_buffer); - find_occurs_in_text (); + swallow_file_in_memory (input_file_name[file_index], text_buffer); + find_occurs_in_text (file_index); /* Maintain for each file how many lines has been read so far when its end is reached. Incrementing the count first is a simple kludge to diff --git a/tests/misc/ptx-overrun.sh b/tests/misc/ptx-overrun.sh index 693a1800a..be9fb524f 100755 --- a/tests/misc/ptx-overrun.sh +++ b/tests/misc/ptx-overrun.sh @@ -1,5 +1,4 @@ #!/bin/sh -# Trigger a heap-clobbering bug in ptx from coreutils-6.10 and earlier. # Copyright (C) 2008-2014 Free Software Foundation, Inc. @@ -19,12 +18,12 @@ . "${srcdir=.}/tests/init.sh"; path_prepend_ ./src print_ver_ ptx +# Trigger a heap-clobbering bug in ptx from coreutils-6.10 and earlier. # Using a long file name makes an abort more likely. # Even with no file name, valgrind detects the buffer overrun. f=01234567890123456789012345678901234567890123456789 touch $f empty || framework_failure_ - # Specifying a regular expression ending in a lone backslash # would cause ptx to write beyond the end of a malloc'd buffer. ptx -F '\' $f < /dev/null > out || fail=1 @@ -32,4 +31,14 @@ ptx -S 'foo\' $f < /dev/null >> out || fail=1 ptx -W 'bar\\\' $f < /dev/null >> out || fail=1 compare out empty || fail=1 + +# Trigger an invalid heap reference noticed by gcc -fsanitize=address +# from coreutils-8.22 and earlier. As well as an invalid memory reference, +# the issue can be seen in the output, with non deterministice whitespace +# trimming when multiple files are specified. +printf '%s\n' 'This is a ptx whitespace Trimming test' > ws.in +ptx ws.in ws.in | sort | uniq -u > out +compare /dev/null out || fail=1 + + Exit $fail -- cgit v1.2.3-70-g09d2