summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/tr.c142
1 files changed, 96 insertions, 46 deletions
diff --git a/src/tr.c b/src/tr.c
index a5b68106b..479d3d367 100644
--- a/src/tr.c
+++ b/src/tr.c
@@ -1177,6 +1177,78 @@ card_of_complement (struct Spec_list *s)
return cardinality;
}
+/* Discard the lengths associated with a case conversion,
+ as using the actual number of upper or lower case characters
+ is problematic when they don't match in some locales.
+ Also ensure the case conversion classes in string2 are
+ aligned correctly with those in string1.
+ Note POSIX says the behavior of `tr "[:upper:]" "[:upper:]"'
+ is undefined. Therefore we allow it (unlike Solaris)
+ and treat it as a no-op. */
+
+static void
+validate_case_classes (struct Spec_list *s1, struct Spec_list *s2)
+{
+ size_t n_upper = 0;
+ size_t n_lower = 0;
+ unsigned int i;
+ int c1 = 0;
+ int c2 = 0;
+ count old_s1_len = s1->length;
+ count old_s2_len = s2->length;
+ struct List_element *s1_tail = s1->tail;
+ struct List_element *s2_tail = s2->tail;
+ bool s1_new_element = true;
+ bool s2_new_element = true;
+
+ if (!s2->has_char_class)
+ return;
+
+ for (i = 0; i < N_CHARS; i++)
+ {
+ if (isupper (i))
+ n_upper++;
+ if (islower (i))
+ n_lower++;
+ }
+
+ s1->state = BEGIN_STATE;
+ s2->state = BEGIN_STATE;
+
+ while (c1 != -1 && c2 != -1)
+ {
+ enum Upper_Lower_class class_s1, class_s2;
+
+ c1 = get_next (s1, &class_s1);
+ c2 = get_next (s2, &class_s2);
+
+ /* If c2 transitions to a new case class, then
+ c1 must also transition at the same time. */
+ if (s2_new_element && class_s2 != UL_NONE
+ && !(s1_new_element && class_s1 != UL_NONE))
+ error (EXIT_FAILURE, 0,
+ _("misaligned [:upper:] and/or [:lower:] construct"));
+
+ /* If case converting, quickly skip over the elements. */
+ if (class_s2 != UL_NONE)
+ {
+ skip_construct (s1);
+ skip_construct (s2);
+ /* Discount insignificant/problematic lengths. */
+ s1->length -= (class_s1 == UL_UPPER ? n_upper : n_lower) - 1;
+ s2->length -= (class_s2 == UL_UPPER ? n_upper : n_lower) - 1;
+ }
+
+ s1_new_element = s1->state == NEW_ELEMENT; /* Next element is new. */
+ s2_new_element = s2->state == NEW_ELEMENT; /* Next element is new. */
+ }
+
+ assert (old_s1_len >= s1->length && old_s2_len >= s2->length);
+
+ s1->tail = s1_tail;
+ s2->tail = s2_tail;
+}
+
/* Gather statistics about the spec-list S in preparation for the tests
in validate that determine the consistency of the specs. This function
is called at most twice; once for string1, and again for any string2.
@@ -1318,20 +1390,14 @@ parse_str (char const *s, struct Spec_list *spec_list)
Upon successful completion, S2->length is set to S1->length. The only
way this function can fail to make S2 as long as S1 is when S2 has
zero-length, since in that case, there is no last character to repeat.
- So S2->length is required to be at least 1.
+ So S2->length is required to be at least 1. */
- Providing this functionality allows the user to do some pretty
- non-BSD (and non-portable) things: For example, the command
- tr -cs '[:upper:]0-9' '[:lower:]'
- is almost guaranteed to give results that depend on your collating
- sequence. */
static void
string2_extend (const struct Spec_list *s1, struct Spec_list *s2)
{
struct List_element *p;
unsigned char char_to_repeat;
- int i;
assert (translating);
assert (s1->length > s2->length);
@@ -1347,11 +1413,13 @@ string2_extend (const struct Spec_list *s1, struct Spec_list *s2)
char_to_repeat = p->u.range.last_char;
break;
case RE_CHAR_CLASS:
- for (i = N_CHARS - 1; i >= 0; i--)
- if (is_char_class_member (p->u.char_class, i))
- break;
- assert (i >= 0);
- char_to_repeat = i;
+ /* Note BSD allows extending of classes in string2. For example:
+ tr '[:upper:]0-9' '[:lower:]'
+ That's not portable however, contradicts POSIX and is dependent
+ on your collating sequence. */
+ error (EXIT_FAILURE, 0,
+ _("when translating with string1 longer than string2,\n\
+the latter string must not end with a character class"));
break;
case RE_REPEATED_CHAR:
@@ -1431,6 +1499,15 @@ validate (struct Spec_list *s1, struct Spec_list *s2)
when translating"));
}
+ if (s2->has_restricted_char_class)
+ {
+ error (EXIT_FAILURE, 0,
+ _("when translating, the only character classes that may \
+appear in\nstring2 are `upper' and `lower'"));
+ }
+
+ validate_case_classes (s1, s2);
+
if (s1->length > s2->length)
{
if (!truncate_set1)
@@ -1452,13 +1529,6 @@ when translating"));
_("when translating with complemented character classes,\
\nstring2 must map all characters in the domain to one"));
}
-
- if (s2->has_restricted_char_class)
- {
- error (EXIT_FAILURE, 0,
- _("when translating, the only character classes that may \
-appear in\nstring2 are `upper' and `lower'"));
- }
}
else
/* Not translating. */
@@ -1812,7 +1882,6 @@ main (int argc, char **argv)
{
int c1, c2;
int i;
- bool case_convert = false;
enum Upper_Lower_class class_s1;
enum Upper_Lower_class class_s2;
@@ -1822,47 +1891,21 @@ main (int argc, char **argv)
s2->state = BEGIN_STATE;
while (true)
{
- /* When the previous pair identified case-converting classes,
- advance S1 and S2 so that each points to the following
- construct. */
- if (case_convert)
- {
- skip_construct (s1);
- skip_construct (s2);
- case_convert = false;
- }
-
c1 = get_next (s1, &class_s1);
c2 = get_next (s2, &class_s2);
- /* When translating and there is an [:upper:] or [:lower:]
- class in SET2, then there must be a corresponding [:lower:]
- or [:upper:] class in SET1. */
- if (class_s1 == UL_NONE
- && (class_s2 == UL_LOWER || class_s2 == UL_UPPER))
- error (EXIT_FAILURE, 0,
- _("misaligned [:upper:] and/or [:lower:] construct"));
-
if (class_s1 == UL_LOWER && class_s2 == UL_UPPER)
{
- case_convert = true;
for (i = 0; i < N_CHARS; i++)
if (islower (i))
xlate[i] = toupper (i);
}
else if (class_s1 == UL_UPPER && class_s2 == UL_LOWER)
{
- case_convert = true;
for (i = 0; i < N_CHARS; i++)
if (isupper (i))
xlate[i] = tolower (i);
}
- else if ((class_s1 == UL_LOWER && class_s2 == UL_LOWER)
- || (class_s1 == UL_UPPER && class_s2 == UL_UPPER))
- {
- /* POSIX says the behavior of `tr "[:upper:]" "[:upper:]"'
- is undefined. Treat it as a no-op. */
- }
else
{
/* The following should have been checked by validate... */
@@ -1870,6 +1913,13 @@ main (int argc, char **argv)
break;
xlate[c1] = c2;
}
+
+ /* When case-converting, skip the elements as an optimization. */
+ if (class_s2 != UL_NONE)
+ {
+ skip_construct (s1);
+ skip_construct (s2);
+ }
}
assert (c1 == -1 || truncate_set1);
}