[SystemZ][z/OS] ASCII/EBCDIC support with no coexistence The aim of this patch is to break up the larger patch (https://reviews.llvm.org/D111323) to be more upstream friendly. In particular, this patch adds the char encoding sensitive changes but does not use inline namespaces as before. The use of namespaces to build both versions of the library, and localization of error messages will follow in a subsequent patch. Differential Revision: https://reviews.llvm.org/D114813 NOKEYCHECK=True GitOrigin-RevId: a1da73961d291c6a205150caa6ebda71757b9add

commit: 38aa7879a02c0bcb541c89a0ba50d1e2f07d4b04 [log] [tgz]
author: Muiez Ahmed <muiez@ibm.com> Fri Jan 14 11:35:53 2022 -0500
committer: Copybara-Service <copybara-worker@google.com> Fri Jan 14 08:41:52 2022 -0800
tree: 2491859c139a2c4c6aa2f43f8aa5411676d3e737
parent: bef95c72a47baf745fb81b33f978e5a36725e786 [diff]
diff --git a/src/locale.cpp b/src/locale.cpp
index 79f03b8..2234784 100644
--- a/src/locale.cpp
+++ b/src/locale.cpp

@@ -898,7 +898,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
     return isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
     return isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
 #else
     return (isascii(c) && iswlower_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'a'+L'A' : c;
@@ -912,7 +912,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
         *low = isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
         *low = isascii(*low) ? ctype<char>::__classic_upper_table()[*low]
                              : *low;
 #else
@@ -927,7 +927,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
     return isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
     return isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
 #else
     return (isascii(c) && isupper_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'A'+'a' : c;
@@ -941,7 +941,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
         *low = isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
         *low = isascii(*low) ? ctype<char>::__classic_lower_table()[*low]
                              : *low;
 #else
@@ -1013,7 +1013,7 @@
       static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c;
 #elif defined(__NetBSD__)
     return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
     return isascii(c) ?
       static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
 #else
@@ -1030,7 +1030,7 @@
           static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)]) : *low;
 #elif defined(__NetBSD__)
         *low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
         *low = isascii(*low) ?
           static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
 #else
@@ -1047,7 +1047,7 @@
       static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c;
 #elif defined(__NetBSD__)
     return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
     return isascii(c) ?
       static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
 #else
@@ -1063,7 +1063,7 @@
         *low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)]) : *low;
 #elif defined(__NetBSD__)
         *low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
         *low = isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
 #else
         *low = (isascii(*low) && isupper_l(*low, _LIBCPP_GET_C_LOCALE)) ? *low-'A'+'a' : *low;
@@ -1211,6 +1211,12 @@
     return _ctype_ + 1;
 #elif defined(_AIX)
     return (const unsigned int *)__lc_ctype_ptr->obj->mask;
+#elif defined(__MVS__)
+# if defined(__NATIVE_ASCII_F)
+    return const_cast<const ctype<char>::mask*> (__OBJ_DATA(__lc_ctype_a)->mask);
+# else
+    return const_cast<const ctype<char>::mask*> (__ctypec);
+# endif
 #else
     // Platform not supported: abort so the person doing the port knows what to
     // fix
@@ -1259,7 +1265,26 @@
 {
     return *__ctype_toupper_loc();
 }
-#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__
+#elif defined(__MVS__)
+const unsigned short*
+ctype<char>::__classic_lower_table() _NOEXCEPT
+{
+# if defined(__NATIVE_ASCII_F)
+  return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->lower);
+# else
+  return const_cast<const unsigned short*>(__ctype + __TOLOWER_INDEX);
+# endif
+}
+const unsigned short *
+ctype<char>::__classic_upper_table() _NOEXCEPT
+{
+# if defined(__NATIVE_ASCII_F)
+  return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->upper);
+# else
+  return const_cast<const unsigned short*>(__ctype + __TOUPPER_INDEX);
+# endif
+}
+#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__ || __MVS__
 
 // template <> class ctype_byname<char>
 

diff --git a/src/regex.cpp b/src/regex.cpp
index 425339a..16ad8f0 100644
--- a/src/regex.cpp
+++ b/src/regex.cpp

@@ -76,6 +76,125 @@
     char char_;
 };
 
+#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
+// EBCDIC IBM-1047
+// Sorted via the EBCDIC collating sequence
+const collationnames collatenames[] =
+{
+    {"a", 0x81},
+    {"alert", 0x2f},
+    {"ampersand", 0x50},
+    {"apostrophe", 0x7d},
+    {"asterisk", 0x5c},
+    {"b", 0x82},
+    {"backslash", 0xe0},
+    {"backspace", 0x16},
+    {"c", 0x83},
+    {"carriage-return", 0xd},
+    {"circumflex", 0x5f},
+    {"circumflex-accent", 0x5f},
+    {"colon", 0x7a},
+    {"comma", 0x6b},
+    {"commercial-at", 0x7c},
+    {"d", 0x84},
+    {"dollar-sign", 0x5b},
+    {"e", 0x85},
+    {"eight", 0xf8},
+    {"equals-sign", 0x7e},
+    {"exclamation-mark", 0x5a},
+    {"f", 0x86},
+    {"five", 0xf5},
+    {"form-feed", 0xc},
+    {"four", 0xf4},
+    {"full-stop", 0x4b},
+    {"g", 0x87},
+    {"grave-accent", 0x79},
+    {"greater-than-sign", 0x6e},
+    {"h", 0x88},
+    {"hyphen", 0x60},
+    {"hyphen-minus", 0x60},
+    {"i", 0x89},
+    {"j", 0x91},
+    {"k", 0x92},
+    {"l", 0x93},
+    {"left-brace", 0xc0},
+    {"left-curly-bracket", 0xc0},
+    {"left-parenthesis", 0x4d},
+    {"left-square-bracket", 0xad},
+    {"less-than-sign", 0x4c},
+    {"low-line", 0x6d},
+    {"m", 0x94},
+    {"n", 0x95},
+    {"newline", 0x15},
+    {"nine", 0xf9},
+    {"number-sign", 0x7b},
+    {"o", 0x96},
+    {"one", 0xf1},
+    {"p", 0x97},
+    {"percent-sign", 0x6c},
+    {"period", 0x4b},
+    {"plus-sign", 0x4e},
+    {"q", 0x98},
+    {"question-mark", 0x6f},
+    {"quotation-mark", 0x7f},
+    {"r", 0x99},
+    {"reverse-solidus", 0xe0},
+    {"right-brace", 0xd0},
+    {"right-curly-bracket", 0xd0},
+    {"right-parenthesis", 0x5d},
+    {"right-square-bracket", 0xbd},
+    {"s", 0xa2},
+    {"semicolon", 0x5e},
+    {"seven", 0xf7},
+    {"six", 0xf6},
+    {"slash", 0x61},
+    {"solidus", 0x61},
+    {"space", 0x40},
+    {"t", 0xa3},
+    {"tab", 0x5},
+    {"three", 0xf3},
+    {"tilde", 0xa1},
+    {"two", 0xf2},
+    {"u", 0xa4},
+    {"underscore", 0x6d},
+    {"v", 0xa5},
+    {"vertical-line", 0x4f},
+    {"vertical-tab", 0xb},
+    {"w", 0xa6},
+    {"x", 0xa7},
+    {"y", 0xa8},
+    {"z", 0xa9},
+    {"zero", 0xf0},
+    {"A", 0xc1},
+    {"B", 0xc2},
+    {"C", 0xc3},
+    {"D", 0xc4},
+    {"E", 0xc5},
+    {"F", 0xc6},
+    {"G", 0xc7},
+    {"H", 0xc8},
+    {"I", 0xc9},
+    {"J", 0xd1},
+    {"K", 0xd2},
+    {"L", 0xd3},
+    {"M", 0xd4},
+    {"N", 0xd5},
+    {"NUL", 0},
+    {"O", 0xd6},
+    {"P", 0xd7},
+    {"Q", 0xd8},
+    {"R", 0xd9},
+    {"S", 0xe2},
+    {"T", 0xe3},
+    {"U", 0xe4},
+    {"V", 0xe5},
+    {"W", 0xe6},
+    {"X", 0xe7},
+    {"Y", 0xe8},
+    {"Z", 0xe9}
+};
+#else
+// ASCII
 const collationnames collatenames[] =
 {
     {"A", 0x41},
@@ -190,6 +309,7 @@
     {"z", 0x7a},
     {"zero", 0x30}
 };
+#endif
 
 struct classnames
 {
commit	38aa7879a02c0bcb541c89a0ba50d1e2f07d4b04	[log] [tgz]
author	Muiez Ahmed <muiez@ibm.com>	Fri Jan 14 11:35:53 2022 -0500
committer	Copybara-Service <copybara-worker@google.com>	Fri Jan 14 08:41:52 2022 -0800
tree	2491859c139a2c4c6aa2f43f8aa5411676d3e737
parent	bef95c72a47baf745fb81b33f978e5a36725e786 [diff]