diff options
author | Jim Meyering <jim@meyering.net> | 1999-05-16 16:18:46 +0000 |
---|---|---|
committer | Jim Meyering <jim@meyering.net> | 1999-05-16 16:18:46 +0000 |
commit | 108ff5810f63e4f022d202aed4c24edd484381d1 (patch) | |
tree | 93bb0cb17b1f94e115e34435252d15f342f88ecc /src/sort.c | |
parent | 4b11c1f7347cd9299cfa1496a22d84dffec12db4 (diff) | |
download | coreutils-108ff5810f63e4f022d202aed4c24edd484381d1.tar.xz |
Don't autodetect the locale of numbers and
months, as this conflicts with POSIX.2 and is tricky to boot.
(FLOATING_COMMA, NLS_STRNCMP, NLS_MAX_GROUPS,
NLS_ONE_CHARACTER_STRING): Remove macros no longer used.
(nls_grouping, nls_fraction_found, nls_month_found, nos_monthtab,
nls_months_collide, nls_keyhead, us_monthtab): Remove variables no
longer used.
(struct nls_keyfield): Remove types no longer used.
(strncoll_s2_readonly, nls_set_fraction, look_for_fraction,
nls_month_is_either_locale, nls_numeric_format): Remove functions no
longer used.
(monthtab): Now has the role that us_monthtab had, but it's const only
if ENABLE_NLS is not defined.
(C_DECIMAL_POINT): Renamed from FLOATING_POINT. All uses changed.
(MONTHS_PER_YEAR): Renamed from NLS_NUM_MONTHS. All uses changed.
(struct_month_cmp): Renamed from nls_sort_month_comp. All uses changed.
Use strcmp, not strcoll, since the user doesn't care about collating
here.
(inittables): Read locale data into monthtab, rather than modifying a
separate month table and futzing with indirection. Do not worry about
colliding months, since we no longer autodetect month locale.
(fraccompare): Don't set no-longer-used variable nls_fraction_found.
(getmonth): Use strncmp to compare months, since user doesn't care
about collating here. Fix bug where code incorrectly assumed that
strlen (monthtab[lo].name) == strlen (monthtab[ix].name).
(keycompare, main): Don't autodetect month locale.
(compare): Don't use NLS_MEMCP in code that can't be executed if
need_locale is false, as NLS_MEMCP is equivalent to memcmp in that
case.
(sort, insertkey, main): Don't autodetect numeric locale.
Diffstat (limited to 'src/sort.c')
-rw-r--r-- | src/sort.c | 447 |
1 files changed, 36 insertions, 411 deletions
diff --git a/src/sort.c b/src/sort.c index a299a0b19..5ec89a5e3 100644 --- a/src/sort.c +++ b/src/sort.c @@ -70,58 +70,33 @@ char *xstrdup (); status code greater than 1. */ #define SORT_FAILURE 2 -#define FLOATING_POINT '.' -#define FLOATING_COMMA ',' +#define C_DECIMAL_POINT '.' #define NEGATION_SIGN '-' #define NUMERIC_ZERO '0' #ifdef ENABLE_NLS # define NLS_MEMCMP(S1, S2, Len) strncoll (S1, S2, Len) -# define NLS_STRNCMP(S1, S2, Len) strncoll_s2_readonly (S1, S2, Len) #else # define NLS_MEMCMP(S1, S2, Len) memcmp (S1, S2, Len) -# define NLS_STRNCMP(S1, S2, Len) strncmp (S1, S2, Len) #endif #ifdef ENABLE_NLS static char decimal_point; static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */ -static char *nls_grouping; /* This is "C" locale, need another? */ static int need_locale = 0; -/* Should we look for decimal point? */ -static int nls_fraction_found = 1; - -/* Look for month notations in text? */ -static int nls_month_found = 1; - # define IS_THOUSANDS_SEP(x) ((x) == th_sep) #else -# define decimal_point FLOATING_POINT +# define decimal_point C_DECIMAL_POINT # define IS_THOUSANDS_SEP(x) 0 #endif -/* If native language support is requested, make a 1-1 map to the - locale character map, otherwise ensure normal behavior. */ -#ifdef ENABLE_NLS - -/* 12 months in a year */ -# define NLS_NUM_MONTHS 12 - -/* Maximum number of elements, to allocate per allocation unit */ -# define NLS_MAX_GROUPS 8 - -/* A string with one character, to enforce char collation */ -# define NLS_ONE_CHARACTER_STRING " " - -#endif - /* The kind of blanks for '-b' to skip in various options. */ enum blanktype { bl_start, bl_end, bl_both }; @@ -193,12 +168,21 @@ static int nonprinting[UCHAR_LIM]; /* Table of non-dictionary characters (not letters, digits, or blanks). */ static int nondictionary[UCHAR_LIM]; -/* Translation table folding lower case to upper. */ +/* Translation table folding lower case to upper. + FIXME: This doesn't work with multibyte character sets. */ static char fold_toupper[UCHAR_LIM]; -/* Table mapping 3-letter month names to integers. +#define MONTHS_PER_YEAR 12 + +#ifndef ENABLE_NLS +# define NLS_CONST const +#else +# define NLS_CONST /* empty */ +#endif + +/* Table mapping month names to integers. Alphabetic order allows binary search. */ -static const struct month us_monthtab[] = +static NLS_CONST struct month monthtab[] = { {"APR", 4}, {"AUG", 8}, @@ -214,26 +198,6 @@ static const struct month us_monthtab[] = {"SEP", 9} }; -#ifdef ENABLE_NLS - -/* Locale may have a different idea of month names */ -static struct month nls_monthtab[NLS_NUM_MONTHS]; -static int nls_months_collide[NLS_NUM_MONTHS + 1]; - -/* Numeric keys, to search for numeric format */ -struct nls_keyfield -{ - struct keyfield *key; - struct nls_keyfield *next; -}; - -static struct nls_keyfield *nls_keyhead = NULL; - -#endif - -/* Which month table to use in the program, default C */ -static const struct month *monthtab = us_monthtab; - /* During the merge phase, the number of files to merge at once. */ #define NMERGE 16 @@ -502,10 +466,10 @@ zaptemp (const char *name) /* Initialize the character class tables. */ static int -nls_sort_month_comp (const void *m1, const void *m2) +struct_month_cmp (const void *m1, const void *m2) { - return strcoll (((const struct month *) m1)->name, - ((const struct month *) m2)->name); + return strcmp (((const struct month *) m1)->name, + ((const struct month *) m2)->name); } /* Do collation on strings S1 and S2, but for at most L characters. @@ -535,35 +499,6 @@ strncoll (char *s1, char *s2, int len) return diff; } -/* Do collation on strings S1 and S2, but for at most L characters. - Use the fact, that we KNOW that S2 is the shorter string and has - length LEN. */ -static int -strncoll_s2_readonly (char *s1, const char *s2, int len) -{ - register int diff; - - assert (len == strlen (s2)); - assert (len <= strlen (s1)); - - if (need_locale) - { - /* Emulate a strncoll function, by forcing strcoll to compare - only the first LEN characters in each string. */ - register unsigned char n1 = s1[len]; - - s1[len] = 0; - diff = strcoll (s1, s2); - s1[len] = n1; - } - else - { - diff = memcmp (s1, s2, len); - } - - return diff; -} - #endif /* NLS */ static void @@ -586,45 +521,27 @@ inittables (void) } #if defined ENABLE_NLS && HAVE_NL_LANGINFO - /* If We're not in the "C" locale, read in different names for months. */ + /* If we're not in the "C" locale, read different names for months. */ if (need_locale) { - nls_months_collide[0] = 1; /* if an error, look again */ - for (i = 0; i < NLS_NUM_MONTHS; i++) + for (i = 0; i < MONTHS_PER_YEAR; i++) { char *s; size_t s_len; - int j; + size_t j; + char *name; - s = (char *) nl_langinfo (ABMON_1 + us_monthtab[i].val - 1); + s = (char *) nl_langinfo (ABMON_1 + i); s_len = strlen (s); - nls_monthtab[i].name = (char *) xmalloc (s_len + 1); - nls_monthtab[i].val = us_monthtab[i].val; + monthtab[i].name = name = (char *) xmalloc (s_len + 1); + monthtab[i].val = i + 1; - /* Be careful: abreviated month names - may be longer than the usual 3 characters. */ for (j = 0; j < s_len; j++) - nls_monthtab[i].name[j] = fold_toupper[UCHAR (s[j])]; - nls_monthtab[i].name[j] = '\0'; - - nls_months_collide[nls_monthtab[i].val] = 0; - for (j = 0; j < NLS_NUM_MONTHS; ++j) - { - if (STREQ (nls_monthtab[i].name, us_monthtab[i].name)) - { - /* There are indeed some month names in English which - collide with the NLS name. */ - nls_months_collide[nls_monthtab[i].val] = 1; - break; - } - } + name[j] = fold_toupper[UCHAR (s[j])]; + name[j] = '\0'; } - /* Now quicksort the month table (should be sorted already!). - However, another locale doesn't rule out the possibility - of a different order of month names. */ - qsort ((void *) nls_monthtab, NLS_NUM_MONTHS, - sizeof (struct month), nls_sort_month_comp); - monthtab = nls_monthtab; + qsort ((void *) monthtab, MONTHS_PER_YEAR, + sizeof (struct month), struct_month_cmp); } #endif /* NLS */ } @@ -948,10 +865,6 @@ findlines (struct buffer *buf, struct lines *lines) static int fraccompare (register const char *a, register const char *b) { -#ifdef ENABLE_NLS - nls_fraction_found = 1; -#endif - if (*a == decimal_point && *b == decimal_point) { while (*++a == *++b) @@ -986,160 +899,6 @@ fraccompare (register const char *a, register const char *b) machine numbers. Comparatively slow for short strings, but asymptotically hideously fast. */ -/* The code here, is like the above... continuous reoccurrance of the - same code... improved 15-JAN-1997 in connection with native languages - support */ - -#ifdef ENABLE_NLS - -/* Decide the kind of fraction the program will use */ -static void -nls_set_fraction (char ch) -{ - if (!nls_fraction_found && ch != decimal_point) - { - if (ch == FLOATING_POINT) - { /* US style */ - decimal_point = FLOATING_POINT; - th_sep = FLOATING_COMMA; - } - else if (ch == FLOATING_COMMA) - { /* EU style */ - decimal_point = FLOATING_COMMA; - th_sep = FLOATING_POINT; - } - else if (ch != decimal_point) - { /* Alien */ - decimal_point = ch; - th_sep = CHAR_MAX + 1; - } - } - nls_fraction_found = 1; -} - -/* Look for a fraction - It isn't as simple as it looks... however, consider a number: - 1.234,00 - 1,234.00 - It's easy to tell which is a decimal point, and which isn't. We use - the grouping information to find out how many digits are grouped together - for thousand separator. - - The idea here, is to use the grouping information... but not to - spend time with verifying the groups... not too much time, anyway. - so, a number represented to us as: - 1.234.567,89 - will be taken and separated into different groups, separated by a - separator character (Decimal point or thousands separator). - {1,234,567} - these are the groups of digits that lead to a separator character, - and with the trailing group is added: - {1,234,567,89} - resulting in 4 groups of numbers. If the resulting number of groups, - are none, or just 1... this is not enough to decide anything about - the decimal point. We need at least two for that. With two groups - we have at least one separator. That separator can be a decimal - point, or a thousands separator... if it is a thousands separator - the number of digits in the last group, will comply with the first - rule in the grouping rule for numeric values. i.e. - |{89}| = grouping[0] - if so, and there are only two groups of numbers, the value cannot - be determined. If there are three or more numbers, the separator - separating the groups is checked. If these are the same, the - character is determined to be a thousands separator. If they are - not the same, the last separator is determined to be a decimal - point. If checking the grouping rules, we find out that there - are no grouping rules defined, either the grouping rules is NULL - or the first grouping number is 0, then the locale format is used. - - We try to take an advantage of a special situation. If the trailing - group, the one that normally should be the fractional part, turns - out to have the same length as the thousands separator rule says, - making a doubt on that it may be a decimal point, we look for the - group before that, i.e. with a two group form: - {1234,567} - where the grouping rule is 3;3... we take a look at group 1, and find - out that |{1234}| > larger of the two first grouping rules, then - the separator has to be a decimal point... - */ - -static void -look_for_fraction (const char *s, const char *e) -{ - register const char *p; - register unsigned short n = 0; - static unsigned short max_groups = 0; - static unsigned short *groups = NULL; - - if (groups == NULL) - { - max_groups = NLS_MAX_GROUPS; - groups = (unsigned short *) xmalloc (sizeof (*groups) * max_groups); - } - - /* skip blanks and signs */ - while (blanks[UCHAR (*s)] || *s == NEGATION_SIGN) - s++; - /* groups = {}, n = 0 */ - for (p = s; p < e; p++) - { - /* groups[n]={number of digits leading to separator n} - n = number of separators so far */ - if (*p == decimal_point || *p == th_sep || *p == FLOATING_POINT) - { - if (++n >= max_groups) - { - /* BIG Number... enlarge table */ - max_groups += NLS_MAX_GROUPS; - groups = (unsigned short *) xrealloc ((char *) groups, - (sizeof (*groups) - * max_groups)); - } - groups[n] = (unsigned short) (p - s); - s = p + 1; - } - else if (!ISDIGIT (*p)) - break; - /* mem[s..p]=digits only */ - } - /* n = number of separators in s..e */ - groups[++n] = (short) (p - s); - /* n = groups in the number */ - if (n <= 1) - return; /* Only one group of numbers... not enough */ - p = nls_grouping; - /* p = address of group rules - s = address of next character after separator */ - s = s - 1; /* s = address of last separator */ - if (p && *p) - { - /* a legal trailing group, iff groups[n] == first rule */ - if (groups[n] != (short) *p) - nls_set_fraction (*s); - else - { - if (n == 2) - { /* Only two groups */ - if (groups[n - 1] > max (p[0], p[1])) - nls_set_fraction (*s); - return; - } - /* if the separators are the same, it's a thousands */ - if (*s != *(s - groups[n])) - nls_set_fraction (*s); - /* s[0] = thousands separator */ - else if (*s == th_sep) - nls_fraction_found = 1; - } - } - else - { - /* no grouping allowed here, last separator IS decimal point */ - nls_set_fraction (*s); - } -} -#endif - static int numcompare (register const char *a, register const char *b) { @@ -1303,7 +1062,7 @@ static int getmonth (const char *s, int len) { char *month; - register int i, lo = 0, hi = 12, result; + register int i, lo = 0, hi = MONTHS_PER_YEAR, result; while (len > 0 && blanks[UCHAR (*s)]) { @@ -1325,38 +1084,19 @@ getmonth (const char *s, int len) { int ix = (lo + hi) / 2; - len = strlen (monthtab[ix].name); - if (NLS_STRNCMP (month, monthtab[ix].name, len) < 0) + if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0) hi = ix; else lo = ix; } while (hi - lo > 1); - result = (!strncmp (month, monthtab[lo].name, len) ? monthtab[lo].val : 0); + result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name)) + ? monthtab[lo].val : 0); return result; } -#ifdef ENABLE_NLS -/* Look for the month in locale table, and if that fails try with - us month name table */ -static int -nls_month_is_either_locale (const char *s, int len) -{ - int ind; - - monthtab = nls_monthtab; - ind = getmonth (s, len); - if (ind == 0) - { - monthtab = us_monthtab; - ind = getmonth (s, len); - } - return ind; -} -#endif - /* Compare two lines A and B trying every key in sequence until there are no more keys or a difference is found. */ @@ -1460,32 +1200,7 @@ keycompare (const struct line *a, const struct line *b) } else if (key->month) { -#ifdef ENABLE_NLS - - /* if we haven't decided which locale to go with, we get the - month name from either. If either month name is fully - solved and the month name doesn't collide with the other - locale... then use that table from there forward */ - if (!nls_month_found) - { - int x; - - x = nls_month_is_either_locale (texta, lena); - nls_month_found = !nls_months_collide[x]; - if (nls_month_found) - { - diff = x - getmonth (textb, lenb); - } - else - { - diff = nls_month_is_either_locale (textb, lenb); - nls_month_found = !nls_months_collide[diff]; - diff = x - diff; - } - } - else -#endif - diff = getmonth (texta, lena) - getmonth (textb, lenb); + diff = getmonth (texta, lena) - getmonth (textb, lenb); if (diff) return key->reverse ? -diff : diff; continue; @@ -1658,7 +1373,7 @@ compare (register const struct line *a, register const struct line *b) diff = UCHAR (*ap) - UCHAR (*bp); if (diff == 0) { - diff = NLS_MEMCMP (ap, bp, mini); + diff = memcmp (ap, bp, mini); if (diff == 0) diff = tmpa - tmpb; } @@ -1935,65 +1650,6 @@ mergefps (FILE **fps, register int nfps, FILE *ofp) } } -#ifdef ENABLE_NLS - -/* Find the numeric format that this file represents to us for sorting. */ -static void -nls_numeric_format (const struct line *line, int nlines) -{ - struct nls_keyfield *n_key = nls_keyhead; - - /* line = first line, nlines = number of lines, - nls_fraction_found = false */ - for (; !nls_fraction_found && nlines > 0; line++, nlines--) - { - int iter; - for (iter = 0; !nls_fraction_found; iter++) - { - char *text; - char *lim; - struct keyfield *key = n_key->key; - - /* text = {}, lim = {}, key = first key */ - if (iter || line->keybeg == NULL) - { - /* Succeding keys, where the key field is - specified */ - if (key->eword >= 0) /* key->eword = length of key */ - lim = limfield (line, key); - else - lim = line->text + line->length; - /* lim = end of key field */ - - if (key->sword >= 0) /* key->sword = start of key */ - text = begfield (line, key); - else - text = line->text; - /* text = start of field */ - } - else - { - /* First key is always the whole line */ - text = line->keybeg; - lim = line->keylim; - } - /* text = start of text to sort - lim = end of text to sort */ - - look_for_fraction (text, lim); - - /* nls_fraction_found = decimal_point found? */ - - if ((n_key = n_key->next) == nls_keyhead) - break; /* No more keys for this line */ - } - } - nls_fraction_found = 1; - /* decide on current decimal_point known */ -} - -#endif - /* Sort the array LINES with NLINES members, using TEMP for temporary space. */ static void @@ -2132,12 +1788,6 @@ sort (char **files, int nfiles, FILE *ofp) tmp = (struct line *) xrealloc ((char *) tmp, ntmp * sizeof (struct line)); } -#ifdef ENABLE_NLS - if (nls_keyhead) - nls_keyhead = nls_keyhead->next; - if (!nls_fraction_found && nls_keyhead) - nls_numeric_format (lines.lines, lines.used); -#endif sortlines (lines.lines, lines.used, tmp); if (feof (fp) && !nfiles && !n_temp_files && !buf.left) { @@ -2187,23 +1837,6 @@ insertkey (struct keyfield *key) k = k->next; k->next = key; key->next = NULL; -#ifdef ENABLE_NLS - if (key->numeric || key->general_numeric) - { - struct nls_keyfield *nk; - - nk = (struct nls_keyfield *) xmalloc (sizeof (struct nls_keyfield)); - nk->key = key; - if (nls_keyhead) - { - nk->next = nls_keyhead->next; - nls_keyhead->next = nk; - } - else - nk->next = nk; - nls_keyhead = nk; - } -#endif } static void @@ -2347,26 +1980,18 @@ main (int argc, char **argv) struct lconv *lconvp = localeconv (); /* If the locale doesn't define a decimal point, or if the decimal - point is multibyte, use the US notation. We don't support + point is multibyte, use the C decimal point. We don't support multibyte decimal points yet. */ decimal_point = *lconvp->decimal_point; if (! decimal_point || lconvp->decimal_point[1]) - decimal_point = FLOATING_POINT; - else - nls_fraction_found = 0; /* Figure out which decimal point to use */ + decimal_point = C_DECIMAL_POINT; /* We don't support multibyte thousands separators yet. */ th_sep = *lconvp->thousands_sep; if (! th_sep || lconvp->thousands_sep[1]) th_sep = CHAR_MAX + 1; - - nls_grouping = (char *) (lconvp->grouping); } - nls_month_found = 0; /* Figure out which month notation to use */ - - monthtab = nls_monthtab; - #endif /* NLS */ bindtextdomain (PACKAGE, LOCALEDIR); |