summaryrefslogtreecommitdiff
path: root/src/ptx.c
diff options
context:
space:
mode:
authorPádraig Brady <P@draigBrady.com>2014-04-28 13:29:41 +0100
committerPádraig Brady <P@draigBrady.com>2014-04-29 09:50:58 +0100
commitf940fece04ec0265ad8c52046f7678ad3116a305 (patch)
treee51e068a1000a9503b7b9a7616b5f8a639d5d750 /src/ptx.c
parent2f8d53a7989bf172931bfe5cfc7f7c4a9dbdd9ed (diff)
downloadcoreutils-f940fece04ec0265ad8c52046f7678ad3116a305.tar.xz
ptx: fix whitespace trimming with multiple files
This issue was identified by running the test suite with http://code.google.com/p/address-sanitizer/ which is included in GCC 4.8 and enabled with -fsanitize=address This was checked on Fedora 20 with GCC 4.8 as follows: $ yum install libasan # http://bugzilla.redhat.com/991003 $ rm -f src/ptx.o $ make check AM_CFLAGS='-fsanitize=address' SUBDIRS=. VERBOSE=yes $ failure identified in tests/test-suite.log To see this particular failure triggered with multiple files: $ src/ptx <(echo a) <(echo a) 2>&1 | asan_symbolize.py -d ================================================================= ==32178==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x60200000e74f at pc 0x435442 bp 0x7fffe8a1b290 sp 0x7fffe8a1b288 READ of size 1 at 0x60200000e74f thread T0 #0 0x435441 in define_all_fields coreutils/src/ptx.c:1425 #1 0x7fa206d31d64 in __libc_start_main ??:? #2 0x42f77c in _start ??:? 0x60200000e74f is located 1 bytes to the left of 3-byte region [0x60200000e750,0x60200000e753) allocated by thread T0 here: #0 0x421809 in realloc ??:? #1 0x439b4e in fread_file coreutils/lib/read-file.c:97 Shadow bytes around the buggy address: 0x0c047fff9c90: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff9ca0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff9cb0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff9cc0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x0c047fff9cd0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fd fd =>0x0c047fff9ce0: fa fa 03 fa fa fa fd fd fa[fa]03 fa fa fa 00 00 0x0c047fff9cf0: fa fa 04 fa fa fa 04 fa fa fa fd fa fa fa fd fa 0x0c047fff9d00: fa fa 00 fa fa fa fd fa fa fa 00 fa fa fa 00 fa 0x0c047fff9d10: fa fa fd fa fa fa fd fa fa fa fd fa fa fa fd fa 0x0c047fff9d20: fa fa fd fa fa fa fd fa fa fa fd fa fa fa fd fa 0x0c047fff9d30: fa fa fd fa fa fa 00 fa fa fa 00 fa fa fa 00 fa Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Heap right redzone: fb Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack partial redzone: f4 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 ASan internal: fe ==32178==ABORTING The initial report and high level analysis were from Jim Meyering... "The underlying problem is that swallow_file_in_memory() is setting the contents of the global text_buffer for the first file, then updating it (clobbering old value) for the second file. Yet, some pointers to the initial buffer have been squirreled away and later, one of them (keyafter) is presumed to point into the new "text_buffer", which it does not. The subsequent SKIP_WHITE_BACKWARDS use backs up "cursor" and goes out of bounds." * src/ptx.c (text_buffers): Maintain references for the limits of each buffer corresponding to each file, rather than just the last processed. (struct OCCURS): Add a member to map back to the corresponding file. Note normally this could be computed from the "reference" member rather than needing the extra storage, however this is not possible when in --references mode. (find_occurs_in_text): Reference the array rather than a single entry. (define_all_fields): Likewise. Also avoid computing the file index since this is now stored directly. (main): Update text_buffers[] array rather than a single text_buffer. * tests/misc/ptx-overrun.sh: Even though this issue is already triggered with AddressSanitizer, add a new case to demonstrate the whitespace trimming issue, and to trigger without AddressSanitizer. Fixes https://bugs.gnu.org/16171
Diffstat (limited to 'src/ptx.c')
-rw-r--r--src/ptx.c69
1 files changed, 38 insertions, 31 deletions
diff --git a/src/ptx.c b/src/ptx.c
index be342eefc..d165e9665 100644
--- a/src/ptx.c
+++ b/src/ptx.c
@@ -166,7 +166,7 @@ static int total_line_count; /* total number of lines seen so far */
static const char **input_file_name; /* array of text input file names */
static int *file_line_count; /* array of 'total_line_count' values at end */
-static BLOCK text_buffer; /* file to study */
+static BLOCK *text_buffers; /* files to study */
/* SKIP_NON_WHITE used only for getting or skipping the reference. */
@@ -232,6 +232,7 @@ typedef struct
DELTA left; /* distance to left context start */
DELTA right; /* distance to right context end */
int reference; /* reference descriptor */
+ size_t file_index; /* corresponding file */
}
OCCURS;
@@ -744,7 +745,7 @@ digest_word_file (const char *file_name, WORD_TABLE *table)
`----------------------------------------------------------------------*/
static void
-find_occurs_in_text (void)
+find_occurs_in_text (size_t file_index)
{
char *cursor; /* for scanning the source text */
char *scan; /* for scanning the source text also */
@@ -760,6 +761,8 @@ find_occurs_in_text (void)
char *word_end; /* end of word */
char *next_context_start; /* next start of left context */
+ const BLOCK *text_buffer = &text_buffers[file_index];
+
/* reference_length is always used within 'if (input_reference)'.
However, GNU C diagnoses that it may be used uninitialized. The
following assignment is merely to shut it up. */
@@ -775,19 +778,19 @@ find_occurs_in_text (void)
found inside it. Also, unconditionally assigning these variable has
the happy effect of shutting up lint. */
- line_start = text_buffer.start;
+ line_start = text_buffer->start;
line_scan = line_start;
if (input_reference)
{
- SKIP_NON_WHITE (line_scan, text_buffer.end);
+ SKIP_NON_WHITE (line_scan, text_buffer->end);
reference_length = line_scan - line_start;
- SKIP_WHITE (line_scan, text_buffer.end);
+ SKIP_WHITE (line_scan, text_buffer->end);
}
/* Process the whole buffer, one line or one sentence at a time. */
- for (cursor = text_buffer.start;
- cursor < text_buffer.end;
+ for (cursor = text_buffer->start;
+ cursor < text_buffer->end;
cursor = next_context_start)
{
@@ -805,11 +808,11 @@ find_occurs_in_text (void)
This test also accounts for the case of an incomplete line or
sentence at the end of the buffer. */
- next_context_start = text_buffer.end;
+ next_context_start = text_buffer->end;
if (context_regex.string)
switch (re_search (&context_regex.pattern, cursor,
- text_buffer.end - cursor,
- 0, text_buffer.end - cursor, &context_regs))
+ text_buffer->end - cursor,
+ 0, text_buffer->end - cursor, &context_regs))
{
case -2:
matcher_error ();
@@ -915,7 +918,7 @@ find_occurs_in_text (void)
total_line_count++;
line_scan++;
line_start = line_scan;
- SKIP_NON_WHITE (line_scan, text_buffer.end);
+ SKIP_NON_WHITE (line_scan, text_buffer->end);
reference_length = line_scan - line_start;
}
else
@@ -956,7 +959,7 @@ find_occurs_in_text (void)
occurs_cursor = occurs_table[0] + number_of_occurs[0];
- /* Define the refence field, if any. */
+ /* Define the reference field, if any. */
if (auto_reference)
{
@@ -973,7 +976,7 @@ find_occurs_in_text (void)
total_line_count++;
line_scan++;
line_start = line_scan;
- SKIP_NON_WHITE (line_scan, text_buffer.end);
+ SKIP_NON_WHITE (line_scan, text_buffer->end);
}
else
line_scan++;
@@ -1007,6 +1010,7 @@ find_occurs_in_text (void)
occurs_cursor->key = possible_key;
occurs_cursor->left = context_start - possible_key.start;
occurs_cursor->right = context_end - possible_key.start;
+ occurs_cursor->file_index = file_index;
number_of_occurs[0]++;
}
@@ -1356,9 +1360,10 @@ define_all_fields (OCCURS *occurs)
char *left_context_start; /* start of left context */
char *right_context_end; /* end of right context */
char *left_field_start; /* conservative start for 'head'/'before' */
- int file_index; /* index in text input file arrays */
const char *file_name; /* file name for reference */
int line_ordinal; /* line ordinal for reference */
+ const char *buffer_start; /* start of buffered file for this occurs */
+ const char *buffer_end; /* end of buffered file for this occurs */
/* Define 'keyafter', start of left context and end of right context.
'keyafter' starts at the saved position for keyword and extend to the
@@ -1371,6 +1376,9 @@ define_all_fields (OCCURS *occurs)
left_context_start = keyafter.start + occurs->left;
right_context_end = keyafter.start + occurs->right;
+ buffer_start = text_buffers[occurs->file_index].start;
+ buffer_end = text_buffers[occurs->file_index].end;
+
cursor = keyafter.end;
while (cursor < right_context_end
&& cursor <= keyafter.start + keyafter_max_width)
@@ -1422,13 +1430,13 @@ define_all_fields (OCCURS *occurs)
if (truncation_string)
{
cursor = before.start;
- SKIP_WHITE_BACKWARDS (cursor, text_buffer.start);
+ SKIP_WHITE_BACKWARDS (cursor, buffer_start);
before_truncation = cursor > left_context_start;
}
else
before_truncation = 0;
- SKIP_WHITE (before.start, text_buffer.end);
+ SKIP_WHITE (before.start, buffer_end);
/* The tail could not take more columns than what has been left in the
left context field, and a gap is mandatory. It starts after the
@@ -1443,7 +1451,7 @@ define_all_fields (OCCURS *occurs)
if (tail_max_width > 0)
{
tail.start = keyafter.end;
- SKIP_WHITE (tail.start, text_buffer.end);
+ SKIP_WHITE (tail.start, buffer_end);
tail.end = tail.start;
cursor = tail.end;
@@ -1489,7 +1497,7 @@ define_all_fields (OCCURS *occurs)
if (head_max_width > 0)
{
head.end = before.start;
- SKIP_WHITE_BACKWARDS (head.end, text_buffer.start);
+ SKIP_WHITE_BACKWARDS (head.end, buffer_start);
head.start = left_field_start;
while (head.start + head_max_width < head.end)
@@ -1520,21 +1528,16 @@ define_all_fields (OCCURS *occurs)
{
/* Construct the reference text in preallocated space from the file
- name and the line number. Find out in which file the reference
- occurred. Standard input yields an empty file name. Insure line
- numbers are one based, even if they are computed zero based. */
-
- file_index = 0;
- while (file_line_count[file_index] < occurs->reference)
- file_index++;
+ name and the line number. Standard input yields an empty file name.
+ Ensure line numbers are 1 based, even if they are computed 0 based. */
- file_name = input_file_name[file_index];
+ file_name = input_file_name[occurs->file_index];
if (!file_name)
file_name = "";
line_ordinal = occurs->reference + 1;
- if (file_index > 0)
- line_ordinal -= file_line_count[file_index - 1];
+ if (occurs->file_index > 0)
+ line_ordinal -= file_line_count[occurs->file_index - 1];
sprintf (reference.start, "%s:%d", file_name, line_ordinal);
reference.end = reference.start + strlen (reference.start);
@@ -2034,6 +2037,7 @@ main (int argc, char **argv)
input_file_name = xmalloc (sizeof *input_file_name);
file_line_count = xmalloc (sizeof *file_line_count);
+ text_buffers = xmalloc (sizeof *text_buffers);
number_input_files = 1;
input_file_name[0] = NULL;
}
@@ -2042,6 +2046,7 @@ main (int argc, char **argv)
number_input_files = argc - optind;
input_file_name = xmalloc (number_input_files * sizeof *input_file_name);
file_line_count = xmalloc (number_input_files * sizeof *file_line_count);
+ text_buffers = xmalloc (number_input_files * sizeof *text_buffers);
for (file_index = 0; file_index < number_input_files; file_index++)
{
@@ -2060,6 +2065,7 @@ main (int argc, char **argv)
number_input_files = 1;
input_file_name = xmalloc (sizeof *input_file_name);
file_line_count = xmalloc (sizeof *file_line_count);
+ text_buffers = xmalloc (sizeof *text_buffers);
if (!*argv[optind] || STREQ (argv[optind], "-"))
input_file_name[0] = NULL;
else
@@ -2126,11 +2132,12 @@ main (int argc, char **argv)
for (file_index = 0; file_index < number_input_files; file_index++)
{
+ BLOCK *text_buffer = text_buffers + file_index;
- /* Read the file in core, than study it. */
+ /* Read the file in core, then study it. */
- swallow_file_in_memory (input_file_name[file_index], &text_buffer);
- find_occurs_in_text ();
+ swallow_file_in_memory (input_file_name[file_index], text_buffer);
+ find_occurs_in_text (file_index);
/* Maintain for each file how many lines has been read so far when its
end is reached. Incrementing the count first is a simple kludge to