Loading...
--- /dev/null
+++ Libc/Libc-1725.0.11/tests/collate.c
@@ -0,0 +1,384 @@
+#include <sys/param.h>
+
+#include <TargetConditionals.h>
+#include <errno.h>
+#include <limits.h>
+#include <locale.h>
+#include <regex.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <xlocale.h>
+
+#include <darwintest.h>
+
+void __collate_lookup_l(const __darwin_wchar_t *, int *, int *, int *,
+ locale_t);
+void __collate_lookup(const unsigned char *, int *, int *, int *);
+
+/*
+ * in C or POSIX locales
+ * __collate_lookup("", ... ) -> len: 0 prim: 0 sec: 0
+ * __collate_lookup("a", ... ) -> len: 1 prim: (int)'a' sec: 0
+ * __collate_lookup("ab", ... ) -> len: 1 prim: (int)'a' sec: 0
+ *
+ * in a Latin-1 locale (de_DE.ISO8859-1)
+ * __collate_lookup("", ... ) -> len: 0 prim: 0 sec: 0
+ * __collate_lookup("a", ... ) -> len: 1 prim: > 0 sec: > 0
+ * __collate_lookup("ab", ... ) -> len: 1 prim: > 0 sec: > 0
+ * # a character not in the table - lookup failure
+ * __collate_lookup("\xdf", ... ) -> len: 0 prim: -1 sec: -1
+ *
+ * in a UTF-8 locale (de_DE.UTF-8)
+ * __collate_lookup("", ... ) -> len: 0 prim: 0 sec: 0
+ * __collate_lookup("a", ... ) -> len: 1 prim: > 0 sec: > 0
+ * __collate_lookup("ab", ... ) -> len: 1 prim: > 0 sec: > 0
+ * # An invalid multi-byte sequence
+ * __collate_lookup("\xe4", ... ) -> len: 1 prim: (int)'\xe4' sec: 0
+ * # valid multi-byte sequence
+ * __collate_lookup("\xc3\xa4", ... ) -> len: 2 prim: > 0 sec: > 0
+ */
+T_DECL(collate_lookup, "Test __collate_lookup() behavior") {
+ unsigned char c;
+ unsigned char str[16];
+ int len, prim, sec, prim2, sec2;
+ char *result;
+
+ /* ------------------------- C Locale ------------------------- */
+ /* In the C locale primary weights should equal the int value of the
+ * character*/
+ result = setlocale(LC_ALL, "C");
+ T_ASSERT_NOTNULL(result, "changed to C locale");
+
+ __collate_lookup("", &len, &prim, &sec);
+ T_ASSERT_EQ_INT(len, 0, "No characters read");
+ T_EXPECT_EQ_INT(prim, 0, "No primary weight");
+ T_EXPECT_EQ_INT(sec, 0, "No secondary weight");
+
+ str[1] = 'X';
+ str[2] = '\0';
+ for (c = 1; c < UCHAR_MAX; c++) {
+ len = 1;
+ str[0] = c;
+ __collate_lookup(str, &len, &prim, &sec);
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+ T_EXPECT_EQ_INT(prim, (int)c, "Primary weight returned is the value of c");
+ T_EXPECT_EQ_INT(sec, 0, "Secondary weight returned is 0");
+ }
+
+#if TARGET_OS_OSX
+ /* ------------------------- German Latin-1 Locale ----------------------- */
+ result = setlocale(LC_ALL, "de_DE.ISO8859-1");
+ T_ASSERT_NOTNULL(result, "changed to german Latin-1 locale");
+
+ __collate_lookup("", &len, &prim, &sec);
+ T_ASSERT_EQ_INT(len, 0, "No characters read");
+ T_EXPECT_EQ_INT(prim, 0, "No primary weight");
+ T_EXPECT_EQ_INT(sec, 0, "No secondary weight");
+
+ str[1] = 'X';
+ str[2] = '\0';
+ for (c = 1; c < UCHAR_MAX; c++) {
+ len = 1;
+ str[0] = c;
+ __collate_lookup(str, &len, &prim, &sec);
+ T_ASSERT_EQ_INT(len, (c == '\0' ? 0 : 1), "Only read one character");
+ str[1] = '\0';
+ if (prim == 0 || prim == -1) {
+ T_EXPECT_EQ(sec, prim, "0x%x has no secondary weight", c);
+ } else {
+ T_EXPECT_GT(prim, 0, "0x%x Has primary weight", c);
+ T_EXPECT_GT(sec, 0, "0x%x Has secondary weight", c);
+ }
+ }
+
+ str[0] = 'a';
+ __collate_lookup(str, &len, &prim, &sec);
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+
+ /* a with dieresis in Latin-1 locales */
+ str[0] = (unsigned char)'\xe4';
+ __collate_lookup(str, &len, &prim2, &sec2);
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+ T_EXPECT_EQ(prim, prim2, "Same primary weight");
+ T_EXPECT_LT(sec, sec2, "Different secondary weight");
+
+ /* ------------------------- German UTF-8 Locale ------------------------- */
+ result = setlocale(LC_ALL, "de_DE.UTF-8");
+ T_ASSERT_NOTNULL(result, "changed to german UTF-8 locale");
+
+ __collate_lookup("", &len, &prim, &sec);
+ T_ASSERT_EQ_INT(len, 0, "No characters read");
+ T_EXPECT_EQ_INT(prim, 0, "No primary weight");
+ T_EXPECT_EQ_INT(sec, 0, "No secondary weight");
+
+ str[1] = 'X';
+ str[2] = '\0';
+ for (c = 1; c < UCHAR_MAX; c++) {
+ len = 2; /* Tell it that this string is longer */
+ str[0] = c;
+ __collate_lookup(str, &len, &prim, &sec);
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+ if (prim == 0 || prim == -1) {
+ T_EXPECT_EQ(sec, prim, "0x%x has no secondary weight", c);
+ } else {
+ T_EXPECT_GT(prim, 0, "0x%x Has primary weight", c);
+ /* weight will be 0 for sequences that result in mb failure */
+ if (c < 128) {
+ /* So only test secondary weights for the ASCII characters */
+ T_EXPECT_GT(sec, 0, "0x%x Has secondary weight", c);
+ }
+ }
+ }
+
+ str[0] = 'a';
+ __collate_lookup(str, &len, &prim, &sec);
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+
+ /* a with dieresis in Latin-1 locales */
+ /* this character is invalid in a UTF-8 locale */
+ str[0] = (unsigned char)'\xe4';
+ errno = 0;
+ __collate_lookup(str, &len, &prim2, &sec2);
+ T_EXPECT_EQ_INT(errno, EILSEQ, "errno indicates invalid character");
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+ T_EXPECT_EQ(prim2, (unsigned int)L'\xe4',
+ "Invalid character - Primary weight equal to value (228)");
+ T_EXPECT_EQ(sec2, 0, "Invalid character - No secondary weight");
+
+ T_EXPECT_NE(prim, prim2, "Different primary weight");
+ T_EXPECT_NE(sec, sec2, "Different secondary weight");
+
+ /* Test Multibyte lookup */
+ str[0] = (unsigned char)'\xc3';
+ str[1] = (unsigned char)'\xa4';
+ str[2] = (unsigned char)'X';
+ str[3] = (unsigned char)'\0';
+ len = 3;
+ __collate_lookup(str, &len, &prim2, &sec2);
+ T_EXPECTFAIL_WITH_REASON(
+ "__collate_lookup doesn't actually tell you how many bytes were used");
+ T_ASSERT_EQ_INT(len, 2, "Only read 2 characters");
+ T_EXPECT_EQ(prim, prim2, "Same primary weight");
+ T_EXPECT_LT(sec, sec2, "Different secondary weight");
+#endif
+}
+
+/*
+ * Tests for the __collate_lookup_l() which is used to lookup weights of wide
+ * characters
+ */
+T_DECL(collate_lookup_l, "Test __collate_lookup_l() behavior") {
+ wchar_t wc;
+ wchar_t wcs[16];
+ char str[16] = {0};
+ int len, prim, sec, prim2, sec2;
+ char *result;
+
+ /* ------------------------- C Locale ------------------------- */
+ /* In the C locale primary weights should equal the int value of the
+ * character*/
+ result = setlocale(LC_ALL, "C");
+ T_ASSERT_NOTNULL(result, "changed to C locale");
+
+ __collate_lookup_l(L"", &len, &prim, &sec, LC_GLOBAL_LOCALE);
+ T_ASSERT_EQ_INT(len, 0, "No characters read");
+ T_EXPECT_EQ_INT(prim, 0, "No primary weight");
+ T_EXPECT_EQ_INT(sec, 0, "No secondary weight");
+
+ wcs[1] = L'X';
+ wcs[2] = L'\0';
+ for (wc = 1; wc < UCHAR_MAX; wc++) {
+ len = 1;
+ wcs[0] = wc;
+ errno = 0;
+ __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE);
+ T_ASSERT_EQ_INT(errno, 0, "No error occurred");
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+ T_EXPECT_EQ_INT(prim, (int)wc,
+ "Primary weight returned is the value of wc");
+ T_EXPECT_EQ_INT(sec, 0, "Secondary weight returned is 0");
+ }
+
+#if TARGET_OS_OSX
+ /* ------------------------- German Latin-1 Locale -------------------------
+ */
+ result = setlocale(LC_ALL, "de_DE.ISO8859-1");
+ T_ASSERT_NOTNULL(result, "changed to german Latin-1 locale");
+
+ wcs[1] = L'X';
+ wcs[2] = L'\0';
+ for (wc = 1; wc < UCHAR_MAX; wc++) {
+ len = 1;
+ wcs[0] = wc;
+ str[0] = wc & 0xFF;
+ __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE);
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+ if (prim == 0 || prim == -1) {
+ T_EXPECT_EQ(sec, prim, "Wide char 0x%x has no secondary weight", wc);
+ } else {
+ T_EXPECT_GT(prim, 0, "Wide char 0x%x Has primary weight", wc);
+ T_EXPECT_GT(sec, 0, "Wide char 0x%x Has secondary weight", wc);
+ }
+ }
+
+ wcs[0] = L'a';
+ __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE);
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+
+ /* a with dieresis in Latin-1 locales */
+ wcs[0] = L'\xe4';
+ __collate_lookup_l(wcs, &len, &prim2, &sec2, LC_GLOBAL_LOCALE);
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+ T_EXPECT_EQ(prim, prim2, "Same primary weight");
+ T_EXPECT_LT(sec, sec2, "Different secondary weight");
+
+ /* ------------------------- German UTF-8 Locale ------------------------- */
+ result = setlocale(LC_ALL, "de_DE.UTF-8");
+ T_ASSERT_NOTNULL(result, "changed to german UTF-8 locale");
+
+ __collate_lookup_l(L"", &len, &prim, &sec, LC_GLOBAL_LOCALE);
+ T_ASSERT_EQ_INT(len, 0, "No characters read");
+ T_EXPECT_EQ_INT(prim, 0, "No primary weight");
+ T_EXPECT_EQ_INT(sec, 0, "No secondary weight");
+
+ for (wc = 1; wc < UCHAR_MAX; wc++) {
+ len = 1;
+ wcs[0] = wc;
+ str[0] = wc & 0xFF;
+ __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE);
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+ if (prim == 0 || prim == -1) {
+ T_EXPECT_EQ(sec, prim, "0x%x has no secondary weight", wc);
+ } else {
+ T_EXPECT_GT(prim, 0, "Wide char 0x%x Has primary weight", wc);
+ T_EXPECT_GT(sec, 0, "Wide char 0x%x Has secondary weight", wc);
+ }
+ }
+
+ /* Test that a lookup of 'a' and '\xe4' returns the same primary weight */
+ wcs[0] = L'a';
+ wcs[1] = L'\0';
+ __collate_lookup_l(wcs, &len, &prim, &sec, LC_GLOBAL_LOCALE);
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+ T_EXPECT_GT(prim, 0, "Wide char 0x%x Has primary weight", wc);
+ T_EXPECT_GT(sec, 0, "Wide char 0x%x Has secondary weight", wc);
+
+ wcs[0] = L'\xe4';
+ wcs[1] = L'\0';
+ errno = 0;
+ __collate_lookup_l(wcs, &len, &prim2, &sec2, LC_GLOBAL_LOCALE);
+ T_EXPECT_EQ_INT(errno, 0, "errno was not set");
+ T_ASSERT_EQ_INT(len, 1, "Only read one character");
+ T_EXPECT_GT(prim2, 0, "Wide char 0x%x Has primary weight", wc);
+ T_EXPECT_GT(sec2, 0, "Wide char 0x%x Has secondary weight", wc);
+
+ T_EXPECT_EQ(prim, prim2, "Primary weight equal");
+ T_EXPECT_NE(sec, sec2, "Different secondary weight");
+#endif
+}
+
+static void
+try_one(regex_t *preg, unsigned char ch, bool expected, const char *type)
+{
+ unsigned char str[2] = { ch, 0x00 };
+ regmatch_t match;
+ int error;
+
+ error = regexec(preg, (const char *)&str[0], 1, &match, 0);
+ T_ASSERT_EQ(error == 0, expected, "Character 0x%x should %s %s",
+ ch, expected ? "match" : "not match", type);
+}
+
+T_DECL(collate_equivalence, "Test __collate_lookup() behavior",
+ T_META_ENABLED(TARGET_OS_OSX)) {
+ regex_t ireg, reg;
+ const char *cpatterns[] = { "[[=a=]]", "[[=A=]]", NULL };
+ const char *isopatterns[] = { "[[=\xe5=]]", "[[=a=]]", NULL };
+ const char *loc;
+ int error;
+ bool lower, exp, iexp;
+
+ loc = setlocale(LC_ALL, "en_US.ISO8859-1");
+ T_ASSERT_EQ_STR(loc, "en_US.ISO8859-1", "setlocale en_US.ISO8859-1");
+
+ for (const char **pat = isopatterns; *pat != NULL; pat++) {
+ T_LOG("Trying pattern '%s', ISO8859-1 locale", *pat);
+ error = regcomp(®, *pat, REG_BASIC);
+ T_ASSERT_EQ_INT(error, 0, "Regex compilation");
+
+ error = regcomp(&ireg, *pat, REG_BASIC | REG_ICASE);
+ T_ASSERT_EQ_INT(error, 0, "Case-insensitive regex compilation");
+
+ for (unsigned char ch = 0x00; ch < 0xff; ch++) {
+ exp = (ch >= 0xe0 && ch < 0xe6) || ch == 'a';
+ iexp = exp || (ch >= 0xc0 && ch < 0xc6) || ch == 'A' ||
+ ch == 0xaa /* FEMININE_ORDINAL_INDICATOR */;
+
+ try_one(®, ch, exp, "sensitively");
+ try_one(&ireg, ch, iexp, "insensitively");
+ }
+
+ regfree(®);
+ regfree(&ireg);
+ }
+
+ loc = setlocale(LC_ALL, "C");
+ T_ASSERT_EQ_STR(loc, "C", "setlocale C");
+
+ lower = true;
+ for (const char **pat = cpatterns; *pat != NULL; pat++) {
+ T_LOG("Trying pattern '%s', C locale", *pat);
+ error = regcomp(®, *pat, REG_BASIC);
+ T_ASSERT_EQ_INT(error, 0, "Regex compilation");
+
+ error = regcomp(&ireg, *pat, REG_BASIC | REG_ICASE);
+ T_ASSERT_EQ_INT(error, 0, "Case-insensitive regex compilation");
+
+ for (unsigned char ch = 0x00; ch < 0xff; ch++) {
+ if (lower) {
+ exp = ch == 'a';
+ iexp = exp || ch == 'A';
+ } else {
+ exp = ch == 'A';
+ iexp = exp || ch == 'a';
+ }
+
+ try_one(®, ch, exp, "sensitively");
+ try_one(&ireg, ch, iexp, "insensitively");
+ }
+
+ regfree(®);
+ regfree(&ireg);
+
+ lower = false;
+ }
+}
+
+T_DECL(collate_subst_legacy, "Test legacy substitution behavior") {
+ char cwdbuf[MAXPATHLEN], *cwd, *localepath;
+ const char *curloc;
+ int ret;
+
+ cwd = getcwd(cwdbuf, sizeof(cwdbuf));
+ T_ASSERT_NE_PTR(cwd, NULL, NULL);
+
+ ret = asprintf(&localepath, "%s/locales", cwd);
+ T_ASSERT_GT(ret, 0, NULL);
+
+ T_LOG("Setting PATH_LOCALE to %s", localepath);
+ ret = setenv("PATH_LOCALE", localepath, 1);
+ T_ASSERT_EQ(ret, 0, NULL);
+
+ /* They should not be equivalent in the C locale. */
+ setlocale(LC_COLLATE, "C");
+ ret = strcoll("ss", "\xdf");
+ T_ASSERT_NE(ret, 0, NULL);
+
+ curloc = setlocale(LC_COLLATE, "legacy.subst");
+ T_ASSERT_EQ_STR(curloc, "legacy.subst", "Set LC_COLLATE to a legacy definition");
+
+ ret = strcoll("ss", "\xdf");
+ T_ASSERT_EQ(ret, 0, "Compare with substitutions applied");
+ free(localepath);
+}