summaryrefslogtreecommitdiff
path: root/src/sort.c
diff options
context:
space:
mode:
authorJim Meyering <jim@meyering.net>1999-05-16 16:18:46 +0000
committerJim Meyering <jim@meyering.net>1999-05-16 16:18:46 +0000
commit108ff5810f63e4f022d202aed4c24edd484381d1 (patch)
tree93bb0cb17b1f94e115e34435252d15f342f88ecc /src/sort.c
parent4b11c1f7347cd9299cfa1496a22d84dffec12db4 (diff)
downloadcoreutils-108ff5810f63e4f022d202aed4c24edd484381d1.tar.xz
Don't autodetect the locale of numbers and
months, as this conflicts with POSIX.2 and is tricky to boot. (FLOATING_COMMA, NLS_STRNCMP, NLS_MAX_GROUPS, NLS_ONE_CHARACTER_STRING): Remove macros no longer used. (nls_grouping, nls_fraction_found, nls_month_found, nos_monthtab, nls_months_collide, nls_keyhead, us_monthtab): Remove variables no longer used. (struct nls_keyfield): Remove types no longer used. (strncoll_s2_readonly, nls_set_fraction, look_for_fraction, nls_month_is_either_locale, nls_numeric_format): Remove functions no longer used. (monthtab): Now has the role that us_monthtab had, but it's const only if ENABLE_NLS is not defined. (C_DECIMAL_POINT): Renamed from FLOATING_POINT. All uses changed. (MONTHS_PER_YEAR): Renamed from NLS_NUM_MONTHS. All uses changed. (struct_month_cmp): Renamed from nls_sort_month_comp. All uses changed. Use strcmp, not strcoll, since the user doesn't care about collating here. (inittables): Read locale data into monthtab, rather than modifying a separate month table and futzing with indirection. Do not worry about colliding months, since we no longer autodetect month locale. (fraccompare): Don't set no-longer-used variable nls_fraction_found. (getmonth): Use strncmp to compare months, since user doesn't care about collating here. Fix bug where code incorrectly assumed that strlen (monthtab[lo].name) == strlen (monthtab[ix].name). (keycompare, main): Don't autodetect month locale. (compare): Don't use NLS_MEMCP in code that can't be executed if need_locale is false, as NLS_MEMCP is equivalent to memcmp in that case. (sort, insertkey, main): Don't autodetect numeric locale.
Diffstat (limited to 'src/sort.c')
-rw-r--r--src/sort.c447
1 files changed, 36 insertions, 411 deletions
diff --git a/src/sort.c b/src/sort.c
index a299a0b19..5ec89a5e3 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -70,58 +70,33 @@ char *xstrdup ();
status code greater than 1. */
#define SORT_FAILURE 2
-#define FLOATING_POINT '.'
-#define FLOATING_COMMA ','
+#define C_DECIMAL_POINT '.'
#define NEGATION_SIGN '-'
#define NUMERIC_ZERO '0'
#ifdef ENABLE_NLS
# define NLS_MEMCMP(S1, S2, Len) strncoll (S1, S2, Len)
-# define NLS_STRNCMP(S1, S2, Len) strncoll_s2_readonly (S1, S2, Len)
#else
# define NLS_MEMCMP(S1, S2, Len) memcmp (S1, S2, Len)
-# define NLS_STRNCMP(S1, S2, Len) strncmp (S1, S2, Len)
#endif
#ifdef ENABLE_NLS
static char decimal_point;
static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
-static char *nls_grouping;
/* This is "C" locale, need another? */
static int need_locale = 0;
-/* Should we look for decimal point? */
-static int nls_fraction_found = 1;
-
-/* Look for month notations in text? */
-static int nls_month_found = 1;
-
# define IS_THOUSANDS_SEP(x) ((x) == th_sep)
#else
-# define decimal_point FLOATING_POINT
+# define decimal_point C_DECIMAL_POINT
# define IS_THOUSANDS_SEP(x) 0
#endif
-/* If native language support is requested, make a 1-1 map to the
- locale character map, otherwise ensure normal behavior. */
-#ifdef ENABLE_NLS
-
-/* 12 months in a year */
-# define NLS_NUM_MONTHS 12
-
-/* Maximum number of elements, to allocate per allocation unit */
-# define NLS_MAX_GROUPS 8
-
-/* A string with one character, to enforce char collation */
-# define NLS_ONE_CHARACTER_STRING " "
-
-#endif
-
/* The kind of blanks for '-b' to skip in various options. */
enum blanktype { bl_start, bl_end, bl_both };
@@ -193,12 +168,21 @@ static int nonprinting[UCHAR_LIM];
/* Table of non-dictionary characters (not letters, digits, or blanks). */
static int nondictionary[UCHAR_LIM];
-/* Translation table folding lower case to upper. */
+/* Translation table folding lower case to upper.
+ FIXME: This doesn't work with multibyte character sets. */
static char fold_toupper[UCHAR_LIM];
-/* Table mapping 3-letter month names to integers.
+#define MONTHS_PER_YEAR 12
+
+#ifndef ENABLE_NLS
+# define NLS_CONST const
+#else
+# define NLS_CONST /* empty */
+#endif
+
+/* Table mapping month names to integers.
Alphabetic order allows binary search. */
-static const struct month us_monthtab[] =
+static NLS_CONST struct month monthtab[] =
{
{"APR", 4},
{"AUG", 8},
@@ -214,26 +198,6 @@ static const struct month us_monthtab[] =
{"SEP", 9}
};
-#ifdef ENABLE_NLS
-
-/* Locale may have a different idea of month names */
-static struct month nls_monthtab[NLS_NUM_MONTHS];
-static int nls_months_collide[NLS_NUM_MONTHS + 1];
-
-/* Numeric keys, to search for numeric format */
-struct nls_keyfield
-{
- struct keyfield *key;
- struct nls_keyfield *next;
-};
-
-static struct nls_keyfield *nls_keyhead = NULL;
-
-#endif
-
-/* Which month table to use in the program, default C */
-static const struct month *monthtab = us_monthtab;
-
/* During the merge phase, the number of files to merge at once. */
#define NMERGE 16
@@ -502,10 +466,10 @@ zaptemp (const char *name)
/* Initialize the character class tables. */
static int
-nls_sort_month_comp (const void *m1, const void *m2)
+struct_month_cmp (const void *m1, const void *m2)
{
- return strcoll (((const struct month *) m1)->name,
- ((const struct month *) m2)->name);
+ return strcmp (((const struct month *) m1)->name,
+ ((const struct month *) m2)->name);
}
/* Do collation on strings S1 and S2, but for at most L characters.
@@ -535,35 +499,6 @@ strncoll (char *s1, char *s2, int len)
return diff;
}
-/* Do collation on strings S1 and S2, but for at most L characters.
- Use the fact, that we KNOW that S2 is the shorter string and has
- length LEN. */
-static int
-strncoll_s2_readonly (char *s1, const char *s2, int len)
-{
- register int diff;
-
- assert (len == strlen (s2));
- assert (len <= strlen (s1));
-
- if (need_locale)
- {
- /* Emulate a strncoll function, by forcing strcoll to compare
- only the first LEN characters in each string. */
- register unsigned char n1 = s1[len];
-
- s1[len] = 0;
- diff = strcoll (s1, s2);
- s1[len] = n1;
- }
- else
- {
- diff = memcmp (s1, s2, len);
- }
-
- return diff;
-}
-
#endif /* NLS */
static void
@@ -586,45 +521,27 @@ inittables (void)
}
#if defined ENABLE_NLS && HAVE_NL_LANGINFO
- /* If We're not in the "C" locale, read in different names for months. */
+ /* If we're not in the "C" locale, read different names for months. */
if (need_locale)
{
- nls_months_collide[0] = 1; /* if an error, look again */
- for (i = 0; i < NLS_NUM_MONTHS; i++)
+ for (i = 0; i < MONTHS_PER_YEAR; i++)
{
char *s;
size_t s_len;
- int j;
+ size_t j;
+ char *name;
- s = (char *) nl_langinfo (ABMON_1 + us_monthtab[i].val - 1);
+ s = (char *) nl_langinfo (ABMON_1 + i);
s_len = strlen (s);
- nls_monthtab[i].name = (char *) xmalloc (s_len + 1);
- nls_monthtab[i].val = us_monthtab[i].val;
+ monthtab[i].name = name = (char *) xmalloc (s_len + 1);
+ monthtab[i].val = i + 1;
- /* Be careful: abreviated month names
- may be longer than the usual 3 characters. */
for (j = 0; j < s_len; j++)
- nls_monthtab[i].name[j] = fold_toupper[UCHAR (s[j])];
- nls_monthtab[i].name[j] = '\0';
-
- nls_months_collide[nls_monthtab[i].val] = 0;
- for (j = 0; j < NLS_NUM_MONTHS; ++j)
- {
- if (STREQ (nls_monthtab[i].name, us_monthtab[i].name))
- {
- /* There are indeed some month names in English which
- collide with the NLS name. */
- nls_months_collide[nls_monthtab[i].val] = 1;
- break;
- }
- }
+ name[j] = fold_toupper[UCHAR (s[j])];
+ name[j] = '\0';
}
- /* Now quicksort the month table (should be sorted already!).
- However, another locale doesn't rule out the possibility
- of a different order of month names. */
- qsort ((void *) nls_monthtab, NLS_NUM_MONTHS,
- sizeof (struct month), nls_sort_month_comp);
- monthtab = nls_monthtab;
+ qsort ((void *) monthtab, MONTHS_PER_YEAR,
+ sizeof (struct month), struct_month_cmp);
}
#endif /* NLS */
}
@@ -948,10 +865,6 @@ findlines (struct buffer *buf, struct lines *lines)
static int
fraccompare (register const char *a, register const char *b)
{
-#ifdef ENABLE_NLS
- nls_fraction_found = 1;
-#endif
-
if (*a == decimal_point && *b == decimal_point)
{
while (*++a == *++b)
@@ -986,160 +899,6 @@ fraccompare (register const char *a, register const char *b)
machine numbers. Comparatively slow for short strings, but asymptotically
hideously fast. */
-/* The code here, is like the above... continuous reoccurrance of the
- same code... improved 15-JAN-1997 in connection with native languages
- support */
-
-#ifdef ENABLE_NLS
-
-/* Decide the kind of fraction the program will use */
-static void
-nls_set_fraction (char ch)
-{
- if (!nls_fraction_found && ch != decimal_point)
- {
- if (ch == FLOATING_POINT)
- { /* US style */
- decimal_point = FLOATING_POINT;
- th_sep = FLOATING_COMMA;
- }
- else if (ch == FLOATING_COMMA)
- { /* EU style */
- decimal_point = FLOATING_COMMA;
- th_sep = FLOATING_POINT;
- }
- else if (ch != decimal_point)
- { /* Alien */
- decimal_point = ch;
- th_sep = CHAR_MAX + 1;
- }
- }
- nls_fraction_found = 1;
-}
-
-/* Look for a fraction
- It isn't as simple as it looks... however, consider a number:
- 1.234,00
- 1,234.00
- It's easy to tell which is a decimal point, and which isn't. We use
- the grouping information to find out how many digits are grouped together
- for thousand separator.
-
- The idea here, is to use the grouping information... but not to
- spend time with verifying the groups... not too much time, anyway.
- so, a number represented to us as:
- 1.234.567,89
- will be taken and separated into different groups, separated by a
- separator character (Decimal point or thousands separator).
- {1,234,567}
- these are the groups of digits that lead to a separator character,
- and with the trailing group is added:
- {1,234,567,89}
- resulting in 4 groups of numbers. If the resulting number of groups,
- are none, or just 1... this is not enough to decide anything about
- the decimal point. We need at least two for that. With two groups
- we have at least one separator. That separator can be a decimal
- point, or a thousands separator... if it is a thousands separator
- the number of digits in the last group, will comply with the first
- rule in the grouping rule for numeric values. i.e.
- |{89}| = grouping[0]
- if so, and there are only two groups of numbers, the value cannot
- be determined. If there are three or more numbers, the separator
- separating the groups is checked. If these are the same, the
- character is determined to be a thousands separator. If they are
- not the same, the last separator is determined to be a decimal
- point. If checking the grouping rules, we find out that there
- are no grouping rules defined, either the grouping rules is NULL
- or the first grouping number is 0, then the locale format is used.
-
- We try to take an advantage of a special situation. If the trailing
- group, the one that normally should be the fractional part, turns
- out to have the same length as the thousands separator rule says,
- making a doubt on that it may be a decimal point, we look for the
- group before that, i.e. with a two group form:
- {1234,567}
- where the grouping rule is 3;3... we take a look at group 1, and find
- out that |{1234}| > larger of the two first grouping rules, then
- the separator has to be a decimal point...
- */
-
-static void
-look_for_fraction (const char *s, const char *e)
-{
- register const char *p;
- register unsigned short n = 0;
- static unsigned short max_groups = 0;
- static unsigned short *groups = NULL;
-
- if (groups == NULL)
- {
- max_groups = NLS_MAX_GROUPS;
- groups = (unsigned short *) xmalloc (sizeof (*groups) * max_groups);
- }
-
- /* skip blanks and signs */
- while (blanks[UCHAR (*s)] || *s == NEGATION_SIGN)
- s++;
- /* groups = {}, n = 0 */
- for (p = s; p < e; p++)
- {
- /* groups[n]={number of digits leading to separator n}
- n = number of separators so far */
- if (*p == decimal_point || *p == th_sep || *p == FLOATING_POINT)
- {
- if (++n >= max_groups)
- {
- /* BIG Number... enlarge table */
- max_groups += NLS_MAX_GROUPS;
- groups = (unsigned short *) xrealloc ((char *) groups,
- (sizeof (*groups)
- * max_groups));
- }
- groups[n] = (unsigned short) (p - s);
- s = p + 1;
- }
- else if (!ISDIGIT (*p))
- break;
- /* mem[s..p]=digits only */
- }
- /* n = number of separators in s..e */
- groups[++n] = (short) (p - s);
- /* n = groups in the number */
- if (n <= 1)
- return; /* Only one group of numbers... not enough */
- p = nls_grouping;
- /* p = address of group rules
- s = address of next character after separator */
- s = s - 1; /* s = address of last separator */
- if (p && *p)
- {
- /* a legal trailing group, iff groups[n] == first rule */
- if (groups[n] != (short) *p)
- nls_set_fraction (*s);
- else
- {
- if (n == 2)
- { /* Only two groups */
- if (groups[n - 1] > max (p[0], p[1]))
- nls_set_fraction (*s);
- return;
- }
- /* if the separators are the same, it's a thousands */
- if (*s != *(s - groups[n]))
- nls_set_fraction (*s);
- /* s[0] = thousands separator */
- else if (*s == th_sep)
- nls_fraction_found = 1;
- }
- }
- else
- {
- /* no grouping allowed here, last separator IS decimal point */
- nls_set_fraction (*s);
- }
-}
-#endif
-
static int
numcompare (register const char *a, register const char *b)
{
@@ -1303,7 +1062,7 @@ static int
getmonth (const char *s, int len)
{
char *month;
- register int i, lo = 0, hi = 12, result;
+ register int i, lo = 0, hi = MONTHS_PER_YEAR, result;
while (len > 0 && blanks[UCHAR (*s)])
{
@@ -1325,38 +1084,19 @@ getmonth (const char *s, int len)
{
int ix = (lo + hi) / 2;
- len = strlen (monthtab[ix].name);
- if (NLS_STRNCMP (month, monthtab[ix].name, len) < 0)
+ if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
hi = ix;
else
lo = ix;
}
while (hi - lo > 1);
- result = (!strncmp (month, monthtab[lo].name, len) ? monthtab[lo].val : 0);
+ result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
+ ? monthtab[lo].val : 0);
return result;
}
-#ifdef ENABLE_NLS
-/* Look for the month in locale table, and if that fails try with
- us month name table */
-static int
-nls_month_is_either_locale (const char *s, int len)
-{
- int ind;
-
- monthtab = nls_monthtab;
- ind = getmonth (s, len);
- if (ind == 0)
- {
- monthtab = us_monthtab;
- ind = getmonth (s, len);
- }
- return ind;
-}
-#endif
-
/* Compare two lines A and B trying every key in sequence until there
are no more keys or a difference is found. */
@@ -1460,32 +1200,7 @@ keycompare (const struct line *a, const struct line *b)
}
else if (key->month)
{
-#ifdef ENABLE_NLS
-
- /* if we haven't decided which locale to go with, we get the
- month name from either. If either month name is fully
- solved and the month name doesn't collide with the other
- locale... then use that table from there forward */
- if (!nls_month_found)
- {
- int x;
-
- x = nls_month_is_either_locale (texta, lena);
- nls_month_found = !nls_months_collide[x];
- if (nls_month_found)
- {
- diff = x - getmonth (textb, lenb);
- }
- else
- {
- diff = nls_month_is_either_locale (textb, lenb);
- nls_month_found = !nls_months_collide[diff];
- diff = x - diff;
- }
- }
- else
-#endif
- diff = getmonth (texta, lena) - getmonth (textb, lenb);
+ diff = getmonth (texta, lena) - getmonth (textb, lenb);
if (diff)
return key->reverse ? -diff : diff;
continue;
@@ -1658,7 +1373,7 @@ compare (register const struct line *a, register const struct line *b)
diff = UCHAR (*ap) - UCHAR (*bp);
if (diff == 0)
{
- diff = NLS_MEMCMP (ap, bp, mini);
+ diff = memcmp (ap, bp, mini);
if (diff == 0)
diff = tmpa - tmpb;
}
@@ -1935,65 +1650,6 @@ mergefps (FILE **fps, register int nfps, FILE *ofp)
}
}
-#ifdef ENABLE_NLS
-
-/* Find the numeric format that this file represents to us for sorting. */
-static void
-nls_numeric_format (const struct line *line, int nlines)
-{
- struct nls_keyfield *n_key = nls_keyhead;
-
- /* line = first line, nlines = number of lines,
- nls_fraction_found = false */
- for (; !nls_fraction_found && nlines > 0; line++, nlines--)
- {
- int iter;
- for (iter = 0; !nls_fraction_found; iter++)
- {
- char *text;
- char *lim;
- struct keyfield *key = n_key->key;
-
- /* text = {}, lim = {}, key = first key */
- if (iter || line->keybeg == NULL)
- {
- /* Succeding keys, where the key field is
- specified */
- if (key->eword >= 0) /* key->eword = length of key */
- lim = limfield (line, key);
- else
- lim = line->text + line->length;
- /* lim = end of key field */
-
- if (key->sword >= 0) /* key->sword = start of key */
- text = begfield (line, key);
- else
- text = line->text;
- /* text = start of field */
- }
- else
- {
- /* First key is always the whole line */
- text = line->keybeg;
- lim = line->keylim;
- }
- /* text = start of text to sort
- lim = end of text to sort */
-
- look_for_fraction (text, lim);
-
- /* nls_fraction_found = decimal_point found? */
-
- if ((n_key = n_key->next) == nls_keyhead)
- break; /* No more keys for this line */
- }
- }
- nls_fraction_found = 1;
- /* decide on current decimal_point known */
-}
-
-#endif
-
/* Sort the array LINES with NLINES members, using TEMP for temporary space. */
static void
@@ -2132,12 +1788,6 @@ sort (char **files, int nfiles, FILE *ofp)
tmp = (struct line *)
xrealloc ((char *) tmp, ntmp * sizeof (struct line));
}
-#ifdef ENABLE_NLS
- if (nls_keyhead)
- nls_keyhead = nls_keyhead->next;
- if (!nls_fraction_found && nls_keyhead)
- nls_numeric_format (lines.lines, lines.used);
-#endif
sortlines (lines.lines, lines.used, tmp);
if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
{
@@ -2187,23 +1837,6 @@ insertkey (struct keyfield *key)
k = k->next;
k->next = key;
key->next = NULL;
-#ifdef ENABLE_NLS
- if (key->numeric || key->general_numeric)
- {
- struct nls_keyfield *nk;
-
- nk = (struct nls_keyfield *) xmalloc (sizeof (struct nls_keyfield));
- nk->key = key;
- if (nls_keyhead)
- {
- nk->next = nls_keyhead->next;
- nls_keyhead->next = nk;
- }
- else
- nk->next = nk;
- nls_keyhead = nk;
- }
-#endif
}
static void
@@ -2347,26 +1980,18 @@ main (int argc, char **argv)
struct lconv *lconvp = localeconv ();
/* If the locale doesn't define a decimal point, or if the decimal
- point is multibyte, use the US notation. We don't support
+ point is multibyte, use the C decimal point. We don't support
multibyte decimal points yet. */
decimal_point = *lconvp->decimal_point;
if (! decimal_point || lconvp->decimal_point[1])
- decimal_point = FLOATING_POINT;
- else
- nls_fraction_found = 0; /* Figure out which decimal point to use */
+ decimal_point = C_DECIMAL_POINT;
/* We don't support multibyte thousands separators yet. */
th_sep = *lconvp->thousands_sep;
if (! th_sep || lconvp->thousands_sep[1])
th_sep = CHAR_MAX + 1;
-
- nls_grouping = (char *) (lconvp->grouping);
}
- nls_month_found = 0; /* Figure out which month notation to use */
-
- monthtab = nls_monthtab;
-
#endif /* NLS */
bindtextdomain (PACKAGE, LOCALEDIR);