diff options
Diffstat (limited to 'src/sort.c')
-rw-r--r-- | src/sort.c | 676 |
1 files changed, 622 insertions, 54 deletions
diff --git a/src/sort.c b/src/sort.c index c094bc5fa..67b9a4541 100644 --- a/src/sort.c +++ b/src/sort.c @@ -19,6 +19,16 @@ The author may be reached (Email) at the address mike@gnu.ai.mit.edu, or (US mail) as Mike Haertel c/o Free Software Foundation. */ +/* NLS addition added 1997 by Ørn E. Hansen. + + Who can be reached at (e-mail) oehansen@daimi.aau.dk, + oe.hansen@halmstad.mail.telia.com + + The additions made to allow NLS for sorting, is free software + and can be freely distributed or modified, under the GNU general + public licence as published by the Free Software Foundation. */ + + #include <config.h> /* Get isblank from GNU libc. */ @@ -36,6 +46,13 @@ #include "error.h" #include "xstrtod.h" +#ifdef ENABLE_NLS +/* this may need some heading.... applies to Debian linux */ +/* for reading the structur of _NL_ITEM... to get abreviated month */ +/* names */ +#include <langinfo.h> +#endif /* NLS */ + #ifdef HAVE_LIMITS_H # include <limits.h> #else @@ -52,6 +69,8 @@ void free (); /* Undefine, to avoid warning about redefinition on some systems. */ #undef min #define min(a, b) ((a) < (b) ? (a) : (b)) +#undef max +#define max(a, b) ((a) > (b) ? (a) : (b)) #define UCHAR_LIM (UCHAR_MAX + 1) #define UCHAR(c) ((unsigned char) (c)) @@ -67,6 +86,52 @@ void free (); status code greater than 1. */ #define SORT_FAILURE 2 +/* Some character constants used in the program. Better do assign */ +/* these globally. Makes the program a little more readable. */ +static unsigned char decimal_point = '.'; +static unsigned char th_sep = ','; +static unsigned char *nls_grouping = "\003\003"; + +#define FLOATING_POINT '.' +#define FLOATING_COMMA ',' +#define NEGATIVE_SIGN '-' +#define NUMERIC_ZERO '0' + +#define CHARS_IN_ABM 3 + +static int need_locale = 0; /* This is "C" locale, need another? */ +static int nls_fraction_found = 1; /* Should we look for decimal point? */ +static int nls_month_found = 1; /* Look for month notations in text? */ + +/* If native language support is requested, make a 1-1 map to the */ +/* locale character map, otherwise ensure normal behaviour */ +#ifdef ENABLE_NLS + +#define NLS_KEY_LIMIT 30 /* Keys have limited length */ +#define NLS_NUM_MONTHS 12 /* 12 months in a year */ +#define NLS_MAX_GROUPS 8 /* Maximum number of groups */ + +/* A string with one character, to enforce char collation */ +#define NLS_ONE_CHARACTER_STRING " " + +/* Two buffers, specificly used to get a one-one map of the table */ +/* used under inittables. */ +unsigned char *nls_temp_buf1, *nls_temp_buf2; + +/* Create a map, that maps the characters in the "C" locale */ +/* 1 - 1 to the locale view of character order */ +unsigned char nls_locale_map[UCHAR_LIM]; + +/* A definition to map each character through the above translation */ +/* table, during sort. */ +#define NLS_MAP(c) UCHAR(c) + +#else + +/* No NLS the character value itself, represents the sorting order */ +#define NLS_MAP(c) UCHAR(c) +#endif + /* The kind of blanks for '-b' to skip in various options. */ enum blanktype { bl_start, bl_end, bl_both }; @@ -143,7 +208,7 @@ static char fold_toupper[UCHAR_LIM]; /* Table mapping 3-letter month names to integers. Alphabetic order allows binary search. */ -static struct month const monthtab[] = +static struct month us_monthtab[] = { {"APR", 4}, {"AUG", 8}, @@ -159,6 +224,23 @@ static struct month const monthtab[] = {"SEP", 9} }; +#ifdef ENABLE_NLS + +/* Locale may have a different idea of month names */ +static struct month nls_monthtab[NLS_NUM_MONTHS]; +static int nls_months_collide[NLS_NUM_MONTHS+1]; + +/* Numeric keys, to search for numeric format */ +static struct nls_keyfield { + struct keyfield *key; + struct nls_keyfield *next; +} *nls_keyhead = NULL; + +#endif + +/* Which month table to use in the program, default C */ +static struct month *monthtab = us_monthtab; + /* During the merge phase, the number of files to merge at once. */ #define NMERGE 16 @@ -246,7 +328,7 @@ for that key. If no key given, use the entire line as key. With no\n\ FILE, or when FILE is -, read standard input.\n\ ") , DEFAULT_TMPDIR); - puts (_("\nReport bugs to <textutils-bugs@gnu.org>.")); + puts (_("\nReport bugs to textutils-bugs@gnu.ai.mit.edu")); } /* Don't use EXIT_FAILURE here in case it is defined to be 1. POSIX requires that sort return 1 IFF invoked with -c and @@ -445,8 +527,39 @@ zaptemp (char *name) } } +#ifdef ENABLE_NLS /* Initialize the character class tables. */ +static int nls_sort_month_comp(struct month *m1, struct month *m2) +{ + return strcoll(m1->name, m2->name); +} + +/* strncoll(a, b, l) */ +/* do collation on strings a and b, but for at most l characters */ +/* we use the fact, that we KNOW that l is the min of the two lengths */ +/* and we make use of the fact, that collation on chars has already */ +/* been done and is stored in NLS_MAP */ +static int strncoll(unsigned char *s1, unsigned char *s2, int l) +{ + register int diff = 0; + + if (need_locale) { + /* Let's emulate a strncoll() function, by forcing strcoll() */ + /* to compare only l characters in both strings. */ + register unsigned char n1=s1[l],n2=s2[l]; + + s1[l]=s2[l]=0; + diff = strcoll(s1, s2); + s1[l]=n1; + s2[l]=n2; + } else + diff = memcmp(s1, s2, l); + return diff; +} + +#endif /* NLS */ + static void inittables (void) { @@ -465,6 +578,33 @@ inittables (void) else fold_toupper[i] = i; } + +#ifdef ENABLE_NLS + /* If We're not in the "C" locale, we gotta read in different */ + /* names for months. */ + if (need_locale) { + unsigned char *s; + int j; + int (*comp)() = nls_sort_month_comp; + + nls_months_collide[0] = 1; /* if an error, look again */ + for (i = 0; i < NLS_NUM_MONTHS; i++) { + s = nl_langinfo(_NL_ITEM(LC_TIME, ABMON_1+us_monthtab[i].val-1)); + nls_monthtab[i].name = strdup(s); + nls_monthtab[i].val = us_monthtab[i].val; + + /* It has been pointed out, that abreviated month names */ + /* may be longer than the usual 3 characters */ + for(j=0;j<strlen(s);j++) nls_monthtab[i].name[j] = fold_toupper[s[j]]; + nls_months_collide[nls_monthtab[i].val] = (strncmp(nls_monthtab[i].name, us_monthtab[i].name, CHARS_IN_ABM) == 0); + } + /* Now quicksort the month table (should be sorted already!) */ + /* However, another locale doesn't rule out the possibility */ + /* of a different order of month names. */ + qsort((void *)nls_monthtab, NLS_NUM_MONTHS, sizeof(struct month), comp); + monthtab = nls_monthtab; + } +#endif /* NLS */ } /* Initialize BUF, allocating ALLOC bytes initially. */ @@ -754,13 +894,86 @@ findlines (struct buffer *buf, struct lines *lines) should begin with a decimal point followed immediately by the digits of the fraction. Strings not of this form are considered to be zero. */ +/* The goal here, is to take two numbers a and b... compare these + in parallel. Instead of converting each, and then comparing the + outcome. Most likely stopping the comparison before the conversion + is complete. The algorithm used, in the old sort: + + Algorithm: fraccompare + Action : compare two decimal fractions + accepts : char *a, char *b + returns : -1 if a<b, 0 if a=b, 1 if a>b. + implement: + + if *a == decimal_point AND *b == decimal_point + find first character different in a and b. + if both are digits, return the difference *a - *b. + if *a is a digit + skip past zeroes + if digit return 1, else 0 + if *b is a digit + skip past zeroes + if digit return -1, else 0 + if *a is a decimal_point + skip past decimal_point and zeroes + if digit return 1, else 0 + if *b is a decimal_point + skip past decimal_point and zeroes + if digit return -1, else 0 + return 0 + + As can be clearly seen, the above implementation duplicates code, + and thus there is place for improvement: + the difference in code of a and b, is solved by using a + refernce to s, assigned to either a or b. and using n + to denote return value. + the difference in either that start being a digit or + the decimal point, is solved by testing if either is + a decimal point, or if the other is a digit... + + if *a or *b is a decimal_point + skip all chars where *a == *b + if *a and *b are digits return *a - *b + s is b, and return code is -1 + if *a is a digit or *a is a decimal_pointm then s is a, return code 1 + skip decimal_point in s + skip zeroes in s + if *s is a digit, return n + return 0 */ + +#ifdef ENABLE_NLS + +static int fraccompare(register const char *a, register const char *b) +{ + register const char *s; + int n = -1; + + if (!nls_fraction_found) nls_fraction_found=1; + if (*a == decimal_point || *b == decimal_point) { + if (*a == *b) + do { + ++a, ++b; + } while (*a == *b && ISDIGIT(*a)); + if (ISDIGIT(*a) && ISDIGIT(*b)) + return (*a) - (*b); + s = b; + if (*a==decimal_point || (ISDIGIT(*a) && *b!=decimal_point)) + s = a, n=1; + if (*s == decimal_point) ++s; + while (*s == NUMERIC_ZERO) ++s; + if (ISDIGIT(*s)) return n; + } + return 0; +} + +#else static int fraccompare (register const char *a, register const char *b) { register int tmpa = *a; register int tmpb = *b; - if (tmpa == '.' && tmpb == '.') + if (tmpa == decimal_point && tmpb == decimal_point) { do tmpa = *++a, tmpb = *++b; @@ -769,15 +982,15 @@ fraccompare (register const char *a, register const char *b) return tmpa - tmpb; if (ISDIGIT (tmpa)) { - while (tmpa == '0') + while (tmpa == NUMERIC_ZERO) tmpa = *++a; if (ISDIGIT (tmpa)) return 1; return 0; } - if (ISDIGIT (tmpb)) + if (digits[tmpb]) { - while (tmpb == '0') + while (tmpb == NUMERIC_ZERO) tmpb = *++b; if (ISDIGIT (tmpb)) return -1; @@ -785,31 +998,224 @@ fraccompare (register const char *a, register const char *b) } return 0; } - else if (tmpa == '.') + else if (tmpa == decimal_point) { do tmpa = *++a; - while (tmpa == '0'); + while (tmpa == NUMERIC_ZERO); if (ISDIGIT (tmpa)) return 1; return 0; } - else if (tmpb == '.') + else if (tmpb == decimal_point) { do tmpb = *++b; - while (tmpb == '0'); + while (tmpb == NUMERIC_ZERO); if (ISDIGIT (tmpb)) return -1; return 0; } return 0; } +#endif /* Compare strings A and B as numbers without explicitly converting them to machine numbers. Comparatively slow for short strings, but asymptotically hideously fast. */ +/* The code here, is like the above... continuous reoccurrance of the + same code... improved 15-JAN-1997 in connection with native languages + support */ + +#ifdef ENABLE_NLS + +/* Decide the kind of fraction the program will use */ +static int nls_set_fraction(register unsigned char ch) +{ + if (!nls_fraction_found && ch != decimal_point) + if (ch == FLOATING_POINT) { /* US style */ + decimal_point = FLOATING_POINT; + th_sep = FLOATING_COMMA; + } else if (ch == FLOATING_COMMA) { /* EU style */ + decimal_point = FLOATING_COMMA; + th_sep = FLOATING_POINT; + } else if (ch != decimal_point) { /* Alien */ + decimal_point = ch; + th_sep = '\0'; + } + return nls_fraction_found=1; +} + +/* Look for a fraction + It ain't as simple as it looks... however, consider a number: + 1.234,00 + 1,234.00 + It's easy to tell which is a decimal point, and which isn't. We use + the grouping iformation to find out how many digits are grouped together + for thousand seperator. + + The idea here, is to use the grouping information... but not to + spend time with verifying the groups... not too much time, anyway. + so, a number represented to us as: + 1.234.567,89 + will be taken and seperated into different groups, seperated by a + seperator character (Decimal point or thousands seperator). + {1,234,567} + these are the groups of digits that lead to a seperator character, + and with the trailing group is added: + {1,234,567,89} + resulting in 4 groups of numbers. If the resulting number of groups, + are none, or just 1... this is not enough to decide anything about + the decimal point. We need at least two for that. With two groups + we have at least one seperator. That seperator can be a decimal + point, or a thousands seperator... if it is a thousands seperator + the number of digits in the last group, will comply with the first + rule in the grouping rule for numeric values. i.e. + |{89}| = grouping[0] + if so, and there are only two groups of numbers, the value cannot + be determined. If there are three or more numbers, the seperator + seperating the groups is checked. If these are the same, the + character is determined to be a thousands seperator. If they are + not the same, the last seperator is determined to be a decimal + point. If checking the grouping rules, we find out that there + are no grouping rules defined, either the grouping rules is NULL + or the first grouping number is 0, then the locale format is used. + + We try to take an advantage of a special situation. If the trailing + group, the one that normally should be the fractional part, turns + out to have the same length as the thousands seperator rule says, + making a doubt on that it may be a decimal point, we look for the + group before that, i.e. with a two group form: + {1234,567} + where the grouping rule is 3;3... we take a look at group 1, and find + out that |{1234}| > larger of the two first grouping rules, then + the seperator has to be a decimal point... + */ + +static int look_for_fraction(unsigned char *s, unsigned char *e) +{ + /* I don't think it's reasonable to think of more than 6 groups */ + register unsigned char *p=s, n=0; + unsigned short groups[NLS_MAX_GROUPS]; + + /* skip blanks and signs */ + while(blanks[*s] || *s == NEGATIVE_SIGN) s++; + /* groups = {}, n = 0 */ + for(;p < e;p++) { + /* groups[n]={number of digits leading to seperator n} + n = number of seperators so far */ + if (*p == decimal_point || *p == th_sep || *p == FLOATING_POINT) { + if (++n >= NLS_MAX_GROUPS) return; /* WOW! BIG Number... */ + groups[n] = (short)(p - s), s=p+1; + } else if (!ISDIGIT(*p)) break; + /* mem[s..p]=digits only */ + } + /* n = number of seperators in s..e */ + groups[++n]=(short)(p - s); + /* n = groups in the number */ + if (n <= 1) return 0; /* Only one group of numbers... not enough */ + p = nls_grouping; + /* p = address of group rules + s = address of next character after seperator */ + s = s - 1; /* s = address of last seperator */ + if (p && *p) { + /* a legal trailing group, iff groups[n] == first rule */ + if (groups[n] != (short)*p) return nls_set_fraction(*s); + if (n == 2) { /* Only two groups */ + if (groups[n-1] > max(p[0],p[1])) + return nls_set_fraction(*s); + return 0; + } + /* if the seperators are the same, it's a thousands */ + if (*s != *(s - groups[n])) + return nls_set_fraction(*s); + /* s[0] = thousands seperator */ + if (*s == FLOATING_COMMA) + return nls_set_fraction(FLOATING_POINT); + return nls_fraction_found=1; + } else { /* no grouping allowed here, last seperator IS decimal point */ + return nls_set_fraction(*s); + } + return 0; +} + +static int +numcompare (register const unsigned char *a, register const unsigned char *b) +{ + int ret_code = 1; /* normal return status, see later in code */ + int diff = 0; /* difference between two digits */ + + while (blanks[*a]) ++a; + while (blanks[*b]) ++b; + + /* next character in a,b is non-blank */ + if ((*a == NEGATIVE_SIGN || *b == NEGATIVE_SIGN) && *a != *b) { + /* a < 0, or b < 0, but not both */ + if (*a == NEGATIVE_SIGN) ret_code = -1, ++a; /* a looks < b */ + else if (*b == NEGATIVE_SIGN) ret_code = 1, ++b; /* b looks < a */ + /* bypass zeroes, decimal points, and thousand sep in a & b */ + while (*a == NUMERIC_ZERO ||(th_sep && *a == th_sep)|| *a == decimal_point) + ++a; + while (*b == NUMERIC_ZERO ||(th_sep && *b == th_sep)|| *b == decimal_point) + ++b; + if (ISDIGIT(*a) || ISDIGIT(*b)) + /* here, either a or b or both are digits + if a and b are digits, the signed one is the lesser. + if a is a digit, and not b.. it means b==0, and if b==0 + than either is signed if b is signed then -0 < a + or if a is signed then -a < 0. The ret_code is already set + to mark that the signed number is the lesser, so we just + return that number here. */ + return ret_code; + + /* *a and *b are neither digits, they are equal -0 == +0 */ + return 0; + } else { + /* either both numbers are signed, or both are not-signed */ + if (*a == NEGATIVE_SIGN) ++a, ++b, ret_code=-1; + /* if both are signed, then remember -100 < -10 (ret_code reversed!) */ + + /* Skip any leading zeroes */ + while (*a == NUMERIC_ZERO) ++a; + while (*b == NUMERIC_ZERO) ++b; + +continue_thousands: + + /* skip all equal digits */ + while (ISDIGIT(*a) && ISDIGIT(*b) && *a == *b) + a++, b++; + + /* Here, we have either different digits, or possible fractions + or thousand seperators. */ + + if (ISDIGIT(*a) && ISDIGIT(*b)) { + if (diff == 0) + diff = ((*a) - (*b)); /* simple, isn't it? not quite */ + a++, b++; + goto continue_thousands; + } + + /* now, here either may be a fraction, or a thousand seperator... + or both. */ + /* We've decided what are decimal_points, and what are thousands sep */ + if ((th_sep != 0) && (*a == th_sep || *b == th_sep)) { + if (*a == th_sep) ++a; + if (*b == th_sep) ++b; + goto continue_thousands; /* Ugly, but better than a while(1) */ + } + + if (ISDIGIT(*a)) return ret_code * 1; /* a has more digits than b */ + if (ISDIGIT(*b)) return ret_code * -1; /* b has more digits than a */ + + /* now, we should have the fractions solved */ + if ((diff == 0) && (*a == decimal_point || *b == decimal_point)) + return ret_code * fraccompare(a, b); + + return diff; /* fall through here, and diff decides */ + } +} +#else static int numcompare (register const char *a, register const char *b) { @@ -823,48 +1229,47 @@ numcompare (register const char *a, register const char *b) while (blanks[tmpb]) tmpb = UCHAR (*++b); - if (tmpa == '-') + if (tmpa == NEGATIVE_SIGN) { do - tmpa = *++a; - while (tmpa == '0'); - if (tmpb != '-') + tmpa = UCHAR (*++a); + while (tmpa == NUMERIC_ZERO); + if (tmpb != NEGATIVE_SIGN) { - if (tmpa == '.') + if (tmpa == decimal_point) do tmpa = *++a; - while (tmpa == '0'); + while (tmpa == NUMERIC_ZERO); if (ISDIGIT (tmpa)) return -1; - while (tmpb == '0') - tmpb = *++b; - if (tmpb == '.') + while (tmpb == NUMERIC_ZERO) + tmpb = UCHAR (*++b); + if (tmpb == decimal_point) do tmpb = *++b; - while (tmpb == '0'); + while (tmpb == NUMERIC_ZERO); if (ISDIGIT (tmpb)) return -1; return 0; } do - tmpb = *++b; - while (tmpb == '0'); + tmpb = UCHAR (*++b); + while (tmpb == NUMERIC_ZERO); - while (tmpa == tmpb && ISDIGIT (tmpa)) - tmpa = *++a, tmpb = *++b; + while (tmpa == tmpb && digits[tmpa]) + tmpa = UCHAR (*++a), tmpb = UCHAR (*++b); - if ((tmpa == '.' && !ISDIGIT (tmpb)) - || (tmpb == '.' && !ISDIGIT (tmpa))) + if ((tmpa == decimal_point && !ISDIGIT (tmpb)) || (tmpb == decimal_point && !ISDIGIT (tmpa))) return -fraccompare (a, b); if (ISDIGIT (tmpa)) - for (loga = 1; ISDIGIT (*++a); ++loga) + for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga) ; else loga = 0; if (ISDIGIT (tmpb)) - for (logb = 1; ISDIGIT (*++b); ++logb) + for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb) ; else logb = 0; @@ -877,49 +1282,48 @@ numcompare (register const char *a, register const char *b) return tmpb - tmpa; } - else if (tmpb == '-') + else if (tmpb == NEGATIVE_SIGN) { do - tmpb = *++b; - while (tmpb == '0'); - if (tmpb == '.') + tmpb = UCHAR (*++b); + while (tmpb == NUMERIC_ZERO); + if (tmpb == decimal_point) do tmpb = *++b; - while (tmpb == '0'); + while (tmpb == NUMERIC_ZERO); if (ISDIGIT (tmpb)) return 1; - while (tmpa == '0') - tmpa = *++a; - if (tmpa == '.') + while (tmpa == NUMERIC_ZERO) + tmpa = UCHAR (*++a); + if (tmpa == decimal_point) do - tmpa = *++a; - while (tmpa == '0'); + tmpa = UCHAR (*++a); + while (tmpa == NUMERIC_ZERO); if (ISDIGIT (tmpa)) return 1; return 0; } else { - while (tmpa == '0') - tmpa = *++a; - while (tmpb == '0') - tmpb = *++b; + while (tmpa == NUMERIC_ZERO) + tmpa = UCHAR (*++a); + while (tmpb == NUMERIC_ZERO) + tmpb = UCHAR (*++b); while (tmpa == tmpb && ISDIGIT (tmpa)) - tmpa = *++a, tmpb = *++b; + tmpa = UCHAR (*++a), tmpb = UCHAR (*++b); - if ((tmpa == '.' && !ISDIGIT (tmpb)) - || (tmpb == '.' && !ISDIGIT (tmpa))) + if ((tmpa == decimal_point && !ISDIGIT (tmpb)) || (tmpb == decimal_point && !ISDIGIT (tmpa))) return fraccompare (a, b); if (ISDIGIT (tmpa)) - for (loga = 1; ISDIGIT (*++a); ++loga) + for (loga = 1; ISDIGIT (UCHAR (*++a)); ++loga) ; else loga = 0; if (ISDIGIT (tmpb)) - for (logb = 1; ISDIGIT (*++b); ++logb) + for (logb = 1; ISDIGIT (UCHAR (*++b)); ++logb) ; else logb = 0; @@ -933,6 +1337,7 @@ numcompare (register const char *a, register const char *b) return tmpa - tmpb; } } +#endif static int general_numcompare (const char *sa, const char *sb) @@ -967,20 +1372,42 @@ getmonth (const char *s, int len) if (len < 3) return 0; - for (i = 0; i < 3; ++i) + for (i = 0; i < CHARS_IN_ABM; ++i) month[i] = fold_toupper[UCHAR (s[i])]; month[3] = '\0'; - while (hi - lo > 1) + while (hi - lo > 1) { +#ifdef ENABLE_NLS + if (strcoll (month, monthtab[(lo+hi)/2].name) < 0) +#else if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0) +#endif hi = (lo + hi) / 2; else lo = (lo + hi) / 2; + } if (!strcmp (month, monthtab[lo].name)) return monthtab[lo].val; return 0; } +#ifdef ENABLE_NLS +/* Look for the month in locale table, and if that fails try with + us month name table */ +static int nls_month_is_either_locale(const char *s, int len) +{ + int ind; + + monthtab = nls_monthtab; + ind = getmonth(s, len); + if (ind == 0) { + monthtab = us_monthtab; + ind = getmonth(s, len); + } + return ind; +} +#endif + /* Compare two lines A and B trying every key in sequence until there are no more keys or a difference is found. */ @@ -1082,11 +1509,60 @@ keycompare (const struct line *a, const struct line *b) } else if (key->month) { +#ifdef ENABLE_NLS + + /* if we haven't decided which locale to go with, we get the + month name from either. If either month name is fully + solved and the month name doesn't collide with the other + locale... then use that table from there forward */ + if (!nls_month_found) { + int x; + + x = nls_month_is_either_locale(texta, lena); + if (nls_month_found = !nls_months_collide[x]) + diff = x - getmonth(textb, lenb); + else { + diff = nls_month_is_either_locale(textb, lenb); + nls_month_found = !nls_months_collide[diff]; + diff = x - diff; + } + } else +#endif diff = getmonth (texta, lena) - getmonth (textb, lenb); if (diff) return key->reverse ? -diff : diff; continue; } +#ifdef ENABLE_NLS + + /* This sorting may become slow, so in a simple locale */ + /* The user can select a faster sort, that is similar */ + /* to ascii sort, but 8-bit instead of 7-bit. But */ + /* can't handle more complex, combined, character sets */ + else if (need_locale) { + unsigned char copy_a[lena+1], copy_b[lenb+1]; + int la, lb, i; + + /* we can't just go strcoll() the two strings, but */ + /* must extract the text for the key, and do the */ + /* proper 'ignore' and 'translate' before comparing */ + for(la=lb=i=0;i<max(lena,lenb);i++) { + if (i < lena) { + copy_a[la]=translate?translate[UCHAR(texta[i])]:texta[i]; + la = ignore?(ignore[UCHAR(texta[i])]?la:la+1):la+1; + } + if (i < lenb) { + copy_b[lb]=translate?translate[UCHAR(textb[i])]:textb[i]; + lb = ignore?(ignore[UCHAR(textb[i])]?lb:lb+1):lb+1; + } + } + copy_a[la]=copy_b[lb]=0; + diff = strcoll(copy_a, copy_b); + if (diff) + return key->reverse? -diff:diff; + continue; + } +#endif else if (ignore && translate) #define CMP_WITH_IGNORE(A, B) \ @@ -1102,7 +1578,7 @@ keycompare (const struct line *a, const struct line *b) { \ if ((A) != (B)) \ { \ - diff = (A) - (B); \ + diff = NLS_MAP(A) - NLS_MAP(B); \ break; \ } \ ++texta; \ @@ -1144,13 +1620,21 @@ keycompare (const struct line *a, const struct line *b) { if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)]) { - diff = (translate[UCHAR (*--texta)] - - translate[UCHAR (*--textb)]); + diff = (NLS_MAP(translate[UCHAR (*--texta)]) + - NLS_MAP(translate[UCHAR (*--textb)])); break; } } else +#ifndef ENABLE_NLS diff = memcmp (texta, textb, min (lena, lenb)); +#else + /* since we don't have a strncoll, should one be emulated? */ + /* as the normal behaviour of the sort program, when two */ + /* equivalent keys are met, is to sort according to length */ + + diff = strncoll (texta, textb, min(lena, lenb)); +#endif if (diff) return key->reverse ? -diff : diff; @@ -1191,10 +1675,18 @@ compare (register const struct line *a, register const struct line *b) { char *ap = a->text, *bp = b->text; - diff = UCHAR (*ap) - UCHAR (*bp); +#ifdef ENABLE_NLS + if (need_locale) /* want absolutely correct sorting */ + return reverse ? -strcoll(ap, bp) : strcoll(ap, bp); +#endif + diff = NLS_MAP (*ap) - NLS_MAP (*bp); if (diff == 0) { +#ifdef ENABLE_NLS + diff = strncoll (ap, bp, mini); +#else diff = memcmp (ap, bp, mini); +#endif if (diff == 0) diff = tmpa - tmpb; } @@ -1469,6 +1961,41 @@ mergefps (FILE **fps, register int nfps, FILE *ofp) } } +#ifdef ENABLE_NLS + +/* + * Let's go into a frenzy and find the numeric format that this file + * represents to us for sorting. + */ +nls_numeric_format(const struct line *line, int nlines) +{ + struct keyfield *key; + struct nls_keyfield *n_key = nls_keyhead; + int iter = 0; + unsigned char *text, *lim; + + for(;!nls_fraction_found && nlines>0;line++,nlines--) + for(iter=0;!nls_fraction_found;++iter) { + key = n_key->key; + if (iter || line->keybeg == NULL) { + if (key->eword >= 0) + lim = limfield(line, key); + else + lim = line->text + line->length; + if (key->sword >= 0) + text = begfield(line, key); + else + text = line->text; + } else + text = line->keybeg, lim = line->keylim; + look_for_fraction(text, lim); + if ((n_key = n_key->next) == nls_keyhead) break; + } + return nls_fraction_found=1; +} + +#endif + /* Sort the array LINES with NLINES members, using TEMP for temporary space. */ static void @@ -1603,6 +2130,12 @@ sort (char **files, int nfiles, FILE *ofp) tmp = (struct line *) xrealloc ((char *) tmp, ntmp * sizeof (struct line)); } +#ifdef ENABLE_NLS + if (nls_keyhead) + nls_keyhead = nls_keyhead->next; + if (!nls_fraction_found && nls_keyhead) + nls_numeric_format(lines.lines, lines.used); +#endif sortlines (lines.lines, lines.used, tmp); if (feof (fp) && !nfiles && !n_temp_files && !buf.left) tfp = ofp; @@ -1650,6 +2183,18 @@ insertkey (struct keyfield *key) k = k->next; k->next = key; key->next = NULL; + if (key->numeric || key->general_numeric) { + struct nls_keyfield *nk; + + nk = (struct nls_keyfield *)xmalloc(sizeof(struct nls_keyfield)); + nk->key = key; + if (nls_keyhead) { + nk->next = nls_keyhead->next; + nls_keyhead->next = nk; + } else + nk->next = nk; + nls_keyhead = nk; + } } static void @@ -1746,7 +2291,30 @@ main (int argc, char **argv) #endif /* SA_INTERRUPT */ program_name = argv[0]; - setlocale (LC_ALL, ""); + +#ifdef ENABLE_NLS + + s = setlocale(LC_ALL, ""); + if (strcmp(s, "C") && strcmp(s, "POSIX")) + need_locale = 1; /* Neither C nor POSIX, we need to initialize it */ + + /* Let's get locale's representation of the decimal point */ + decimal_point = *( localeconv() )->decimal_point; + th_sep = *( localeconv() )->thousands_sep; + nls_grouping = ( localeconv() )->grouping; + + /* if locale doesn't define a decimal point, we'll use the + US notation. */ + if (decimal_point == 0) + decimal_point = FLOATING_POINT; + else + nls_fraction_found = 0; /* Figure out which decimal point to use */ + nls_month_found = 0; /* Figure out which month notation to use */ + + monthtab = nls_monthtab; + +#endif /* NLS */ + bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); |