summaryrefslogtreecommitdiff
path: root/usr.bin/sort/coll.c
diff options
context:
space:
mode:
authorTodd C. Miller <millert@cvs.openbsd.org>2015-03-17 17:45:14 +0000
committerTodd C. Miller <millert@cvs.openbsd.org>2015-03-17 17:45:14 +0000
commit8198c038aec798ab3af5869cb13307102659a4c7 (patch)
tree9140e3275590f14aefcd4285e5e7e13a20056075 /usr.bin/sort/coll.c
parent8f1f3bebdd8a5303bb242a3d4f83f99378719b32 (diff)
Initial import of FreeBSD sort.
Diffstat (limited to 'usr.bin/sort/coll.c')
-rw-r--r--usr.bin/sort/coll.c1288
1 files changed, 1288 insertions, 0 deletions
diff --git a/usr.bin/sort/coll.c b/usr.bin/sort/coll.c
new file mode 100644
index 00000000000..a5badf5c9eb
--- /dev/null
+++ b/usr.bin/sort/coll.c
@@ -0,0 +1,1288 @@
+/* $OpenBSD: coll.c,v 1.1 2015/03/17 17:45:13 millert Exp $ */
+
+/*-
+ * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
+ * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+
+#include <errno.h>
+#include <err.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <math.h>
+#include <md5.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "coll.h"
+#include "vsort.h"
+
+struct key_specs *keys;
+size_t keys_num = 0;
+
+wint_t symbol_decimal_point = L'.';
+/* there is no default thousands separator in collate rules: */
+wint_t symbol_thousands_sep = 0;
+wint_t symbol_negative_sign = L'-';
+wint_t symbol_positive_sign = L'+';
+
+static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
+static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
+static int monthcoll(struct key_value*, struct key_value *, size_t offset);
+static int numcoll(struct key_value*, struct key_value *, size_t offset);
+static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
+static int randomcoll(struct key_value*, struct key_value *, size_t offset);
+static int versioncoll(struct key_value*, struct key_value *, size_t offset);
+
+/*
+ * Allocate keys array
+ */
+struct keys_array *
+keys_array_alloc(void)
+{
+ struct keys_array *ka;
+ size_t sz;
+
+ sz = keys_array_size();
+ ka = sort_malloc(sz);
+ memset(ka, 0, sz);
+
+ return ka;
+}
+
+/*
+ * Calculate whether we need key hint space
+ */
+static size_t
+key_hint_size(void)
+{
+
+ return need_hint ? sizeof(struct key_hint) : 0;
+}
+
+/*
+ * Calculate keys array size
+ */
+size_t
+keys_array_size(void)
+{
+
+ return keys_num * (sizeof(struct key_value) + key_hint_size());
+}
+
+/*
+ * Clean data of keys array
+ */
+void
+clean_keys_array(const struct bwstring *s, struct keys_array *ka)
+{
+
+ if (ka) {
+ size_t i;
+
+ for (i = 0; i < keys_num; ++i)
+ if (ka->key[i].k && ka->key[i].k != s)
+ bwsfree(ka->key[i].k);
+ memset(ka, 0, keys_array_size());
+ }
+}
+
+/*
+ * Set value of a key in the keys set
+ */
+void
+set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
+{
+
+ if (ka && keys_num > ind) {
+ struct key_value *kv;
+
+ kv = &(ka->key[ind]);
+
+ if (kv->k && kv->k != s)
+ bwsfree(kv->k);
+ kv->k = s;
+ }
+}
+
+/*
+ * Initialize a sort list item
+ */
+struct sort_list_item *
+sort_list_item_alloc(void)
+{
+ struct sort_list_item *si;
+ size_t sz;
+
+ sz = sizeof(struct sort_list_item) + keys_array_size();
+ si = sort_malloc(sz);
+ memset(si, 0, sz);
+
+ return si;
+}
+
+size_t
+sort_list_item_size(struct sort_list_item *si)
+{
+ size_t i, ret = 0;
+
+ if (si) {
+ ret = sizeof(struct sort_list_item) + keys_array_size();
+ if (si->str)
+ ret += bws_memsize(si->str);
+ for (i = 0; i < keys_num; ++i) {
+ struct key_value *kv;
+
+ kv = &(si->ka.key[i]);
+
+ if (kv->k != si->str)
+ ret += bws_memsize(kv->k);
+ }
+ }
+ return ret;
+}
+
+/*
+ * Calculate key for a sort list item
+ */
+static void
+sort_list_item_make_key(struct sort_list_item *si)
+{
+
+ preproc(si->str, &(si->ka));
+}
+
+/*
+ * Set value of a sort list item.
+ * Return combined string and keys memory size.
+ */
+void
+sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
+{
+
+ if (si) {
+ clean_keys_array(si->str, &(si->ka));
+ if (si->str) {
+ if (si->str == str) {
+ /* we are trying to reset the same string */
+ return;
+ } else {
+ bwsfree(si->str);
+ si->str = NULL;
+ }
+ }
+ si->str = str;
+ sort_list_item_make_key(si);
+ }
+}
+
+/*
+ * De-allocate a sort list item object memory
+ */
+void
+sort_list_item_clean(struct sort_list_item *si)
+{
+
+ if (si) {
+ clean_keys_array(si->str, &(si->ka));
+ if (si->str) {
+ bwsfree(si->str);
+ si->str = NULL;
+ }
+ }
+}
+
+/*
+ * Skip columns according to specs
+ */
+static size_t
+skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
+ bool skip_blanks, bool *empty_key)
+{
+ if (cols < 1)
+ return BWSLEN(s) + 1;
+
+ if (skip_blanks)
+ while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
+ ++start;
+
+ while (start < BWSLEN(s) && cols > 1) {
+ --cols;
+ ++start;
+ }
+
+ if (start >= BWSLEN(s))
+ *empty_key = true;
+
+ return start;
+}
+
+/*
+ * Skip fields according to specs
+ */
+static size_t
+skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
+{
+
+ if (fields < 2) {
+ if (BWSLEN(s) == 0)
+ *empty_field = true;
+ return 0;
+ } else if (!(sort_opts_vals.tflag)) {
+ size_t cpos = 0;
+ bool pb = true;
+
+ while (cpos < BWSLEN(s)) {
+ bool isblank;
+
+ isblank = iswblank(BWS_GET(s, cpos));
+
+ if (isblank && !pb) {
+ --fields;
+ if (fields <= 1)
+ return cpos;
+ }
+ pb = isblank;
+ ++cpos;
+ }
+ if (fields > 1)
+ *empty_field = true;
+ return cpos;
+ } else {
+ size_t cpos = 0;
+
+ while (cpos < BWSLEN(s)) {
+ if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
+ --fields;
+ if (fields <= 1)
+ return cpos + 1;
+ }
+ ++cpos;
+ }
+ if (fields > 1)
+ *empty_field = true;
+ return cpos;
+ }
+}
+
+/*
+ * Find fields start
+ */
+static void
+find_field_start(const struct bwstring *s, struct key_specs *ks,
+ size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
+{
+
+ *field_start = skip_fields_to_start(s, ks->f1, empty_field);
+ if (!*empty_field)
+ *key_start = skip_cols_to_start(s, ks->c1, *field_start,
+ ks->pos1b, empty_key);
+ else
+ *empty_key = true;
+}
+
+/*
+ * Find end key position
+ */
+static size_t
+find_field_end(const struct bwstring *s, struct key_specs *ks)
+{
+ size_t f2, next_field_start, pos_end;
+ bool empty_field, empty_key;
+
+ empty_field = false;
+ empty_key = false;
+ f2 = ks->f2;
+
+ if (f2 == 0)
+ return BWSLEN(s) + 1;
+ else {
+ if (ks->c2 == 0) {
+ next_field_start = skip_fields_to_start(s, f2 + 1,
+ &empty_field);
+ if ((next_field_start > 0) && sort_opts_vals.tflag &&
+ ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
+ next_field_start - 1)))
+ --next_field_start;
+ } else
+ next_field_start = skip_fields_to_start(s, f2,
+ &empty_field);
+ }
+
+ if (empty_field || (next_field_start >= BWSLEN(s)))
+ return BWSLEN(s) + 1;
+
+ if (ks->c2) {
+ pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
+ ks->pos2b, &empty_key);
+ if (pos_end < BWSLEN(s))
+ ++pos_end;
+ } else
+ pos_end = next_field_start;
+
+ return pos_end;
+}
+
+/*
+ * Cut a field according to the key specs
+ */
+static struct bwstring *
+cut_field(const struct bwstring *s, struct key_specs *ks)
+{
+ struct bwstring *ret = NULL;
+
+ if (s && ks) {
+ size_t field_start, key_end, key_start, sz;
+ bool empty_field, empty_key;
+
+ field_start = 0;
+ key_start = 0;
+ empty_field = false;
+ empty_key = false;
+
+ find_field_start(s, ks, &field_start, &key_start,
+ &empty_field, &empty_key);
+
+ if (empty_key)
+ sz = 0;
+ else {
+ key_end = find_field_end(s, ks);
+ sz = (key_end < key_start) ? 0 : (key_end - key_start);
+ }
+
+ ret = bwsalloc(sz);
+ if (sz)
+ bwsnocpy(ret, s, key_start, sz);
+ } else
+ ret = bwsalloc(0);
+
+ return ret;
+}
+
+/*
+ * Preprocesses a line applying the necessary transformations
+ * specified by command line options and returns the preprocessed
+ * string, which can be used to compare.
+ */
+int
+preproc(struct bwstring *s, struct keys_array *ka)
+{
+
+ if (sort_opts_vals.kflag) {
+ size_t i;
+ for (i = 0; i < keys_num; i++) {
+ struct bwstring *key;
+ struct key_specs *kspecs;
+ struct sort_mods *sm;
+
+ kspecs = &(keys[i]);
+ key = cut_field(s, kspecs);
+
+ sm = &(kspecs->sm);
+ if (sm->dflag)
+ key = dictionary_order(key);
+ else if (sm->iflag)
+ key = ignore_nonprinting(key);
+ if (sm->fflag || sm->Mflag)
+ key = ignore_case(key);
+
+ set_key_on_keys_array(ka, key, i);
+ }
+ } else {
+ struct bwstring *ret = NULL;
+ struct sort_mods *sm = default_sort_mods;
+
+ if (sm->bflag) {
+ if (ret == NULL)
+ ret = bwsdup(s);
+ ret = ignore_leading_blanks(ret);
+ }
+ if (sm->dflag) {
+ if (ret == NULL)
+ ret = bwsdup(s);
+ ret = dictionary_order(ret);
+ } else if (sm->iflag) {
+ if (ret == NULL)
+ ret = bwsdup(s);
+ ret = ignore_nonprinting(ret);
+ }
+ if (sm->fflag || sm->Mflag) {
+ if (ret == NULL)
+ ret = bwsdup(s);
+ ret = ignore_case(ret);
+ }
+ if (ret == NULL)
+ set_key_on_keys_array(ka, s, 0);
+ else
+ set_key_on_keys_array(ka, ret, 0);
+ }
+
+ return 0;
+}
+
+cmpcoll_t
+get_sort_func(struct sort_mods *sm)
+{
+
+ if (sm->nflag)
+ return numcoll;
+ else if (sm->hflag)
+ return hnumcoll;
+ else if (sm->gflag)
+ return gnumcoll;
+ else if (sm->Mflag)
+ return monthcoll;
+ else if (sm->Rflag)
+ return randomcoll;
+ else if (sm->Vflag)
+ return versioncoll;
+ else
+ return wstrcoll;
+}
+
+/*
+ * Compares the given strings. Returns a positive number if
+ * the first precedes the second, a negative number if the second is
+ * the preceding one, and zero if they are equal. This function calls
+ * the underlying collate functions, which done the actual comparison.
+ */
+int
+key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
+{
+ struct sort_mods *sm;
+ int res = 0;
+ size_t i;
+
+ for (i = 0; i < keys_num; ++i) {
+ sm = &(keys[i].sm);
+
+ if (sm->rflag)
+ res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
+ else
+ res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
+
+ if (res)
+ break;
+
+ /* offset applies to only the first key */
+ offset = 0;
+ }
+ return res;
+}
+
+/*
+ * Compare two strings.
+ * Plain symbol-by-symbol comparison.
+ */
+int
+top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
+{
+
+ if (default_sort_mods->rflag) {
+ const struct bwstring *tmp;
+
+ tmp = s1;
+ s1 = s2;
+ s2 = tmp;
+ }
+
+ return bwscoll(s1, s2, 0);
+}
+
+/*
+ * Compare a string and a sort list item, according to the sort specs.
+ */
+int
+str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
+{
+ struct keys_array *ka1;
+ int ret = 0;
+
+ ka1 = keys_array_alloc();
+
+ preproc(str1, ka1);
+
+ sort_list_item_make_key(*ss2);
+
+ if (debug_sort) {
+ bwsprintf(stdout, str1, "; s1=<", ">");
+ bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
+ }
+
+ ret = key_coll(ka1, &((*ss2)->ka), 0);
+
+ if (debug_sort)
+ printf("; cmp1=%d", ret);
+
+ clean_keys_array(str1, ka1);
+ sort_free(ka1);
+
+ if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
+ ret = top_level_str_coll(str1, ((*ss2)->str));
+ if (debug_sort)
+ printf("; cmp2=%d", ret);
+ }
+
+ if (debug_sort)
+ putchar('\n');
+
+ return ret;
+}
+
+/*
+ * Compare two sort list items, according to the sort specs.
+ */
+int
+list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
+ size_t offset)
+{
+ int ret;
+
+ ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
+
+ if (debug_sort) {
+ if (offset)
+ printf("; offset=%d", (int) offset);
+ bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
+ bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
+ printf("; cmp1=%d\n", ret);
+ }
+
+ if (ret)
+ return ret;
+
+ if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
+ ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
+ if (debug_sort)
+ printf("; cmp2=%d\n", ret);
+ }
+
+ return ret;
+}
+
+/*
+ * Compare two sort list items, according to the sort specs.
+ */
+int
+list_coll(const void *ss1, const void *ss2)
+{
+
+ return list_coll_offset((struct sort_list_item **)ss1,
+ (struct sort_list_item **)ss2, 0);
+}
+
+#define LSCDEF(N) \
+static int \
+list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
+{ \
+ \
+ return list_coll_offset(ss1, ss2, N); \
+}
+
+LSCDEF(0)
+LSCDEF(1)
+LSCDEF(2)
+LSCDEF(3)
+LSCDEF(4)
+LSCDEF(5)
+LSCDEF(6)
+LSCDEF(7)
+LSCDEF(8)
+LSCDEF(9)
+LSCDEF(10)
+LSCDEF(11)
+LSCDEF(12)
+LSCDEF(13)
+LSCDEF(14)
+LSCDEF(15)
+LSCDEF(16)
+LSCDEF(17)
+LSCDEF(18)
+LSCDEF(19)
+LSCDEF(20)
+
+listcoll_t
+get_list_call_func(size_t offset)
+{
+ static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
+ list_coll_2, list_coll_3, list_coll_4, list_coll_5,
+ list_coll_6, list_coll_7, list_coll_8, list_coll_9,
+ list_coll_10, list_coll_11, list_coll_12, list_coll_13,
+ list_coll_14, list_coll_15, list_coll_16, list_coll_17,
+ list_coll_18, list_coll_19, list_coll_20 };
+
+ if (offset <= 20)
+ return lsarray[offset];
+
+ return list_coll_0;
+}
+
+/*
+ * Compare two sort list items, only by their original string.
+ */
+int
+list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
+{
+
+ return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
+}
+
+/*
+ * Maximum size of a number in the string (before or after decimal point)
+ */
+#define MAX_NUM_SIZE (128)
+
+/*
+ * Set suffix value
+ */
+static void
+setsuffix(wchar_t c, unsigned char *si)
+{
+ switch (c){
+ case L'k':
+ case L'K':
+ *si = 1;
+ break;
+ case L'M':
+ *si = 2;
+ break;
+ case L'G':
+ *si = 3;
+ break;
+ case L'T':
+ *si = 4;
+ break;
+ case L'P':
+ *si = 5;
+ break;
+ case L'E':
+ *si = 6;
+ break;
+ case L'Z':
+ *si = 7;
+ break;
+ case L'Y':
+ *si = 8;
+ break;
+ default:
+ *si = 0;
+ };
+}
+
+/*
+ * Read string s and parse the string into a fixed-decimal-point number.
+ * sign equals -1 if the number is negative (explicit plus is not allowed,
+ * according to GNU sort's "info sort".
+ * The number part before decimal point is in the smain, after the decimal
+ * point is in sfrac, tail is the pointer to the remainder of the string.
+ */
+static int
+read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
+{
+ bwstring_iterator s;
+
+ s = bws_begin(s0);
+
+ /* always end the fraction with zero, even if we have no fraction */
+ sfrac[0] = 0;
+
+ while (iswblank(bws_get_iter_value(s)))
+ s = bws_iterator_inc(s, 1);
+
+ if (bws_get_iter_value(s) == (wchar_t)symbol_negative_sign) {
+ *sign = -1;
+ s = bws_iterator_inc(s, 1);
+ }
+
+ // This is '0', not '\0', do not change this
+ while (iswdigit(bws_get_iter_value(s)) &&
+ (bws_get_iter_value(s) == L'0'))
+ s = bws_iterator_inc(s, 1);
+
+ while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
+ if (iswdigit(bws_get_iter_value(s))) {
+ smain[*main_len] = bws_get_iter_value(s);
+ s = bws_iterator_inc(s, 1);
+ *main_len += 1;
+ } else if (symbol_thousands_sep &&
+ (bws_get_iter_value(s) == (wchar_t)symbol_thousands_sep))
+ s = bws_iterator_inc(s, 1);
+ else
+ break;
+ }
+
+ smain[*main_len] = 0;
+
+ if (bws_get_iter_value(s) == (wchar_t)symbol_decimal_point) {
+ s = bws_iterator_inc(s, 1);
+ while (iswdigit(bws_get_iter_value(s)) &&
+ *frac_len < MAX_NUM_SIZE) {
+ sfrac[*frac_len] = bws_get_iter_value(s);
+ s = bws_iterator_inc(s, 1);
+ *frac_len += 1;
+ }
+ sfrac[*frac_len] = 0;
+
+ while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
+ --(*frac_len);
+ sfrac[*frac_len] = L'\0';
+ }
+ }
+
+ setsuffix(bws_get_iter_value(s), si);
+
+ if ((*main_len + *frac_len) == 0)
+ *sign = 0;
+
+ return 0;
+}
+
+/*
+ * Implements string sort.
+ */
+static int
+wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
+{
+
+ if (debug_sort) {
+ if (offset)
+ printf("; offset=%d\n", (int) offset);
+ bwsprintf(stdout, kv1->k, "; k1=<", ">");
+ printf("(%zu)", BWSLEN(kv1->k));
+ bwsprintf(stdout, kv2->k, ", k2=<", ">");
+ printf("(%zu)", BWSLEN(kv2->k));
+ }
+
+ return bwscoll(kv1->k, kv2->k, offset);
+}
+
+/*
+ * Compare two suffixes
+ */
+static inline int
+cmpsuffix(unsigned char si1, unsigned char si2)
+{
+
+ return (char)si1 - (char)si2;
+}
+
+/*
+ * Implements numeric sort for -n and -h.
+ */
+static int
+numcoll_impl(struct key_value *kv1, struct key_value *kv2,
+ size_t offset __unused, bool use_suffix)
+{
+ struct bwstring *s1, *s2;
+ wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
+ wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
+ int cmp_res, sign1, sign2;
+ size_t frac1, frac2, main1, main2;
+ unsigned char SI1, SI2;
+ bool e1, e2, key1_read, key2_read;
+
+ s1 = kv1->k;
+ s2 = kv2->k;
+ sign1 = sign2 = 0;
+ main1 = main2 = 0;
+ frac1 = frac2 = 0;
+
+ key1_read = key2_read = false;
+
+ if (debug_sort) {
+ bwsprintf(stdout, s1, "; k1=<", ">");
+ bwsprintf(stdout, s2, ", k2=<", ">");
+ }
+
+ if (s1 == s2)
+ return 0;
+
+ if (kv1->hint->status == HS_UNINITIALIZED) {
+ /* read the number from the string */
+ read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
+ key1_read = true;
+ kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
+ if (main1 < 1 && frac1 < 1)
+ kv1->hint->v.nh.empty=true;
+ kv1->hint->v.nh.si = SI1;
+ kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
+ HS_INITIALIZED : HS_ERROR;
+ kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
+ }
+
+ if (kv2->hint->status == HS_UNINITIALIZED) {
+ /* read the number from the string */
+ read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
+ key2_read = true;
+ kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
+ if (main2 < 1 && frac2 < 1)
+ kv2->hint->v.nh.empty=true;
+ kv2->hint->v.nh.si = SI2;
+ kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
+ HS_INITIALIZED : HS_ERROR;
+ kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
+ }
+
+ if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
+ HS_INITIALIZED) {
+ unsigned long long n1, n2;
+ bool neg1, neg2;
+
+ e1 = kv1->hint->v.nh.empty;
+ e2 = kv2->hint->v.nh.empty;
+
+ if (e1 && e2)
+ return 0;
+
+ neg1 = kv1->hint->v.nh.neg;
+ neg2 = kv2->hint->v.nh.neg;
+
+ if (neg1 && !neg2)
+ return -1;
+ if (neg2 && !neg1)
+ return 1;
+
+ if (e1)
+ return neg2 ? 1 : -1;
+ else if (e2)
+ return neg1 ? -1 : 1;
+
+
+ if (use_suffix) {
+ cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
+ if (cmp_res)
+ return neg1 ? -cmp_res : cmp_res;
+ }
+
+ n1 = kv1->hint->v.nh.n1;
+ n2 = kv2->hint->v.nh.n1;
+ if (n1 < n2)
+ return neg1 ? 1 : -1;
+ else if (n1 > n2)
+ return neg1 ? -1 : 1;
+ }
+
+ /* read the numbers from the strings */
+ if (!key1_read)
+ read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
+ if (!key2_read)
+ read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
+
+ e1 = ((main1 + frac1) == 0);
+ e2 = ((main2 + frac2) == 0);
+
+ if (e1 && e2)
+ return 0;
+
+ /* we know the result if the signs are different */
+ if (sign1 < 0 && sign2 >= 0)
+ return -1;
+ if (sign1 >= 0 && sign2 < 0)
+ return 1;
+
+ if (e1)
+ return (sign2 < 0) ? +1 : -1;
+ else if (e2)
+ return (sign1 < 0) ? -1 : 1;
+
+ if (use_suffix) {
+ cmp_res = cmpsuffix(SI1, SI2);
+ if (cmp_res)
+ return (sign1 < 0) ? -cmp_res : cmp_res;
+ }
+
+ /* if both numbers are empty assume that the strings are equal */
+ if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
+ return 0;
+
+ /*
+ * if the main part is of different size, we know the result
+ * (because the leading zeros are removed)
+ */
+ if (main1 < main2)
+ cmp_res = -1;
+ else if (main1 > main2)
+ cmp_res = +1;
+ /* if the sizes are equal then simple non-collate string compare gives the correct result */
+ else
+ cmp_res = wcscmp(smain1, smain2);
+
+ /* check fraction */
+ if (!cmp_res)
+ cmp_res = wcscmp(sfrac1, sfrac2);
+
+ if (!cmp_res)
+ return 0;
+
+ /* reverse result if the signs are negative */
+ if (sign1 < 0 && sign2 < 0)
+ cmp_res = -cmp_res;
+
+ return cmp_res;
+}
+
+/*
+ * Implements numeric sort (-n).
+ */
+static int
+numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
+{
+
+ return numcoll_impl(kv1, kv2, offset, false);
+}
+
+/*
+ * Implements 'human' numeric sort (-h).
+ */
+static int
+hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
+{
+
+ return numcoll_impl(kv1, kv2, offset, true);
+}
+
+/*
+ * Implements random sort (-R).
+ */
+static int
+randomcoll(struct key_value *kv1, struct key_value *kv2,
+ size_t offset __unused)
+{
+ struct bwstring *s1, *s2;
+ MD5_CTX ctx1, ctx2;
+ char *b1, *b2;
+
+ s1 = kv1->k;
+ s2 = kv2->k;
+
+ if (debug_sort) {
+ bwsprintf(stdout, s1, "; k1=<", ">");
+ bwsprintf(stdout, s2, ", k2=<", ">");
+ }
+
+ if (s1 == s2)
+ return 0;
+
+ memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
+ memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
+
+ MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
+ MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
+ b1 = MD5End(&ctx1, NULL);
+ b2 = MD5End(&ctx2, NULL);
+ if (b1 == NULL) {
+ if (b2 == NULL)
+ return 0;
+ else {
+ sort_free(b2);
+ return -1;
+ }
+ } else if (b2 == NULL) {
+ sort_free(b1);
+ return 1;
+ } else {
+ int cmp_res;
+
+ cmp_res = strcmp(b1, b2);
+ sort_free(b1);
+ sort_free(b2);
+
+ if (!cmp_res)
+ cmp_res = bwscoll(s1, s2, 0);
+
+ return cmp_res;
+ }
+}
+
+/*
+ * Implements version sort (-V).
+ */
+static int
+versioncoll(struct key_value *kv1, struct key_value *kv2,
+ size_t offset __unused)
+{
+ struct bwstring *s1, *s2;
+
+ s1 = kv1->k;
+ s2 = kv2->k;
+
+ if (debug_sort) {
+ bwsprintf(stdout, s1, "; k1=<", ">");
+ bwsprintf(stdout, s2, ", k2=<", ">");
+ }
+
+ if (s1 == s2)
+ return 0;
+
+ return vcmp(s1, s2);
+}
+
+/*
+ * Check for minus infinity
+ */
+static inline bool
+huge_minus(double d, int err1)
+{
+
+ if (err1 == ERANGE)
+ if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Check for plus infinity
+ */
+static inline bool
+huge_plus(double d, int err1)
+{
+
+ if (err1 == ERANGE)
+ if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Check whether a function is a NAN
+ */
+static bool
+is_nan(double d)
+{
+
+ return (d == NAN || isnan(d));
+}
+
+/*
+ * Compare two NANs
+ */
+static int
+cmp_nans(double d1, double d2)
+{
+
+ if (d1 == d2)
+ return 0;
+ return d1 < d2 ? -1 : 1;
+}
+
+/*
+ * Implements general numeric sort (-g).
+ */
+static int
+gnumcoll(struct key_value *kv1, struct key_value *kv2,
+ size_t offset __unused)
+{
+ double d1, d2;
+ int err1, err2;
+ bool empty1, empty2, key1_read, key2_read;
+
+ d1 = d2 = 0;
+ err1 = err2 = 0;
+ key1_read = key2_read = false;
+
+ if (debug_sort) {
+ bwsprintf(stdout, kv1->k, "; k1=<", ">");
+ bwsprintf(stdout, kv2->k, "; k2=<", ">");
+ }
+
+ if (kv1->hint->status == HS_UNINITIALIZED) {
+ errno = 0;
+ d1 = bwstod(kv1->k, &empty1);
+ err1 = errno;
+
+ if (empty1)
+ kv1->hint->v.gh.notnum = true;
+ else if (err1 == 0) {
+ kv1->hint->v.gh.d = d1;
+ kv1->hint->v.gh.nan = is_nan(d1);
+ kv1->hint->status = HS_INITIALIZED;
+ } else
+ kv1->hint->status = HS_ERROR;
+
+ key1_read = true;
+ }
+
+ if (kv2->hint->status == HS_UNINITIALIZED) {
+ errno = 0;
+ d2 = bwstod(kv2->k, &empty2);
+ err2 = errno;
+
+ if (empty2)
+ kv2->hint->v.gh.notnum = true;
+ else if (err2 == 0) {
+ kv2->hint->v.gh.d = d2;
+ kv2->hint->v.gh.nan = is_nan(d2);
+ kv2->hint->status = HS_INITIALIZED;
+ } else
+ kv2->hint->status = HS_ERROR;
+
+ key2_read = true;
+ }
+
+ if (kv1->hint->status == HS_INITIALIZED &&
+ kv2->hint->status == HS_INITIALIZED) {
+ if (kv1->hint->v.gh.notnum)
+ return kv2->hint->v.gh.notnum ? 0 : -1;
+ else if (kv2->hint->v.gh.notnum)
+ return 1;
+
+ if (kv1->hint->v.gh.nan)
+ return kv2->hint->v.gh.nan ?
+ cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
+ else if (kv2->hint->v.gh.nan)
+ return 1;
+
+ d1 = kv1->hint->v.gh.d;
+ d2 = kv2->hint->v.gh.d;
+
+ if (d1 < d2)
+ return -1;
+ else if (d1 > d2)
+ return 1;
+ else
+ return 0;
+ }
+
+ if (!key1_read) {
+ errno = 0;
+ d1 = bwstod(kv1->k, &empty1);
+ err1 = errno;
+ }
+
+ if (!key2_read) {
+ errno = 0;
+ d2 = bwstod(kv2->k, &empty2);
+ err2 = errno;
+ }
+
+ /* Non-value case: */
+ if (empty1)
+ return empty2 ? 0 : -1;
+ else if (empty2)
+ return 1;
+
+ /* NAN case */
+ if (is_nan(d1))
+ return is_nan(d2) ? cmp_nans(d1, d2) : -1;
+ else if (is_nan(d2))
+ return 1;
+
+ /* Infinities */
+ if (err1 == ERANGE || err2 == ERANGE) {
+ /* Minus infinity case */
+ if (huge_minus(d1, err1)) {
+ if (huge_minus(d2, err2)) {
+ if (d1 == d2)
+ return 0;
+ return d1 < d2 ? -1 : 1;
+ } else
+ return -1;
+
+ } else if (huge_minus(d2, err2)) {
+ if (huge_minus(d1, err1)) {
+ if (d1 == d2)
+ return 0;
+ return d1 < d2 ? -1 : 1;
+ } else
+ return 1;
+ }
+
+ /* Plus infinity case */
+ if (huge_plus(d1, err1)) {
+ if (huge_plus(d2, err2)) {
+ if (d1 == d2)
+ return 0;
+ return d1 < d2 ? -1 : 1;
+ } else
+ return 1;
+ } else if (huge_plus(d2, err2)) {
+ if (huge_plus(d1, err1)) {
+ if (d1 == d2)
+ return 0;
+ return d1 < d2 ? -1 : 1;
+ } else
+ return -1;
+ }
+ }
+
+ if (d1 == d2)
+ return 0;
+ return d1 < d2 ? -1 : 1;
+}
+
+/*
+ * Implements month sort (-M).
+ */
+static int
+monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
+{
+ int val1, val2;
+ bool key1_read, key2_read;
+
+ val1 = val2 = 0;
+ key1_read = key2_read = false;
+
+ if (debug_sort) {
+ bwsprintf(stdout, kv1->k, "; k1=<", ">");
+ bwsprintf(stdout, kv2->k, "; k2=<", ">");
+ }
+
+ if (kv1->hint->status == HS_UNINITIALIZED) {
+ kv1->hint->v.Mh.m = bws_month_score(kv1->k);
+ key1_read = true;
+ kv1->hint->status = HS_INITIALIZED;
+ }
+
+ if (kv2->hint->status == HS_UNINITIALIZED) {
+ kv2->hint->v.Mh.m = bws_month_score(kv2->k);
+ key2_read = true;
+ kv2->hint->status = HS_INITIALIZED;
+ }
+
+ if (kv1->hint->status == HS_INITIALIZED) {
+ val1 = kv1->hint->v.Mh.m;
+ key1_read = true;
+ }
+
+ if (kv2->hint->status == HS_INITIALIZED) {
+ val2 = kv2->hint->v.Mh.m;
+ key2_read = true;
+ }
+
+ if (!key1_read)
+ val1 = bws_month_score(kv1->k);
+ if (!key2_read)
+ val2 = bws_month_score(kv2->k);
+
+ if (val1 == val2)
+ return 0;
+ return val1 < val2 ? -1 : 1;
+}