[SystemZ][z/OS] ASCII/EBCDIC support with no coexistence

The aim of this patch is to break up the larger patch (https://reviews.llvm.org/D111323) to be more upstream friendly. In particular, this patch adds the char encoding sensitive changes but does not use inline namespaces as before. The use of namespaces to build both versions of the library, and localization of error messages will follow in a subsequent patch.

Differential Revision: https://reviews.llvm.org/D114813

NOKEYCHECK=True
GitOrigin-RevId: a1da73961d291c6a205150caa6ebda71757b9add
diff --git a/src/locale.cpp b/src/locale.cpp
index 79f03b8..2234784 100644
--- a/src/locale.cpp
+++ b/src/locale.cpp
@@ -898,7 +898,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
     return isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
     return isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
 #else
     return (isascii(c) && iswlower_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'a'+L'A' : c;
@@ -912,7 +912,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
         *low = isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
         *low = isascii(*low) ? ctype<char>::__classic_upper_table()[*low]
                              : *low;
 #else
@@ -927,7 +927,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
     return isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
     return isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
 #else
     return (isascii(c) && isupper_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'A'+'a' : c;
@@ -941,7 +941,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
         *low = isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
         *low = isascii(*low) ? ctype<char>::__classic_lower_table()[*low]
                              : *low;
 #else
@@ -1013,7 +1013,7 @@
       static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c;
 #elif defined(__NetBSD__)
     return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
     return isascii(c) ?
       static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
 #else
@@ -1030,7 +1030,7 @@
           static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)]) : *low;
 #elif defined(__NetBSD__)
         *low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
         *low = isascii(*low) ?
           static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
 #else
@@ -1047,7 +1047,7 @@
       static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c;
 #elif defined(__NetBSD__)
     return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
     return isascii(c) ?
       static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
 #else
@@ -1063,7 +1063,7 @@
         *low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)]) : *low;
 #elif defined(__NetBSD__)
         *low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
         *low = isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
 #else
         *low = (isascii(*low) && isupper_l(*low, _LIBCPP_GET_C_LOCALE)) ? *low-'A'+'a' : *low;
@@ -1211,6 +1211,12 @@
     return _ctype_ + 1;
 #elif defined(_AIX)
     return (const unsigned int *)__lc_ctype_ptr->obj->mask;
+#elif defined(__MVS__)
+# if defined(__NATIVE_ASCII_F)
+    return const_cast<const ctype<char>::mask*> (__OBJ_DATA(__lc_ctype_a)->mask);
+# else
+    return const_cast<const ctype<char>::mask*> (__ctypec);
+# endif
 #else
     // Platform not supported: abort so the person doing the port knows what to
     // fix
@@ -1259,7 +1265,26 @@
 {
     return *__ctype_toupper_loc();
 }
-#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__
+#elif defined(__MVS__)
+const unsigned short*
+ctype<char>::__classic_lower_table() _NOEXCEPT
+{
+# if defined(__NATIVE_ASCII_F)
+  return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->lower);
+# else
+  return const_cast<const unsigned short*>(__ctype + __TOLOWER_INDEX);
+# endif
+}
+const unsigned short *
+ctype<char>::__classic_upper_table() _NOEXCEPT
+{
+# if defined(__NATIVE_ASCII_F)
+  return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->upper);
+# else
+  return const_cast<const unsigned short*>(__ctype + __TOUPPER_INDEX);
+# endif
+}
+#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__ || __MVS__
 
 // template <> class ctype_byname<char>
 
diff --git a/src/regex.cpp b/src/regex.cpp
index 425339a..16ad8f0 100644
--- a/src/regex.cpp
+++ b/src/regex.cpp
@@ -76,6 +76,125 @@
     char char_;
 };
 
+#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
+// EBCDIC IBM-1047
+// Sorted via the EBCDIC collating sequence
+const collationnames collatenames[] =
+{
+    {"a", 0x81},
+    {"alert", 0x2f},
+    {"ampersand", 0x50},
+    {"apostrophe", 0x7d},
+    {"asterisk", 0x5c},
+    {"b", 0x82},
+    {"backslash", 0xe0},
+    {"backspace", 0x16},
+    {"c", 0x83},
+    {"carriage-return", 0xd},
+    {"circumflex", 0x5f},
+    {"circumflex-accent", 0x5f},
+    {"colon", 0x7a},
+    {"comma", 0x6b},
+    {"commercial-at", 0x7c},
+    {"d", 0x84},
+    {"dollar-sign", 0x5b},
+    {"e", 0x85},
+    {"eight", 0xf8},
+    {"equals-sign", 0x7e},
+    {"exclamation-mark", 0x5a},
+    {"f", 0x86},
+    {"five", 0xf5},
+    {"form-feed", 0xc},
+    {"four", 0xf4},
+    {"full-stop", 0x4b},
+    {"g", 0x87},
+    {"grave-accent", 0x79},
+    {"greater-than-sign", 0x6e},
+    {"h", 0x88},
+    {"hyphen", 0x60},
+    {"hyphen-minus", 0x60},
+    {"i", 0x89},
+    {"j", 0x91},
+    {"k", 0x92},
+    {"l", 0x93},
+    {"left-brace", 0xc0},
+    {"left-curly-bracket", 0xc0},
+    {"left-parenthesis", 0x4d},
+    {"left-square-bracket", 0xad},
+    {"less-than-sign", 0x4c},
+    {"low-line", 0x6d},
+    {"m", 0x94},
+    {"n", 0x95},
+    {"newline", 0x15},
+    {"nine", 0xf9},
+    {"number-sign", 0x7b},
+    {"o", 0x96},
+    {"one", 0xf1},
+    {"p", 0x97},
+    {"percent-sign", 0x6c},
+    {"period", 0x4b},
+    {"plus-sign", 0x4e},
+    {"q", 0x98},
+    {"question-mark", 0x6f},
+    {"quotation-mark", 0x7f},
+    {"r", 0x99},
+    {"reverse-solidus", 0xe0},
+    {"right-brace", 0xd0},
+    {"right-curly-bracket", 0xd0},
+    {"right-parenthesis", 0x5d},
+    {"right-square-bracket", 0xbd},
+    {"s", 0xa2},
+    {"semicolon", 0x5e},
+    {"seven", 0xf7},
+    {"six", 0xf6},
+    {"slash", 0x61},
+    {"solidus", 0x61},
+    {"space", 0x40},
+    {"t", 0xa3},
+    {"tab", 0x5},
+    {"three", 0xf3},
+    {"tilde", 0xa1},
+    {"two", 0xf2},
+    {"u", 0xa4},
+    {"underscore", 0x6d},
+    {"v", 0xa5},
+    {"vertical-line", 0x4f},
+    {"vertical-tab", 0xb},
+    {"w", 0xa6},
+    {"x", 0xa7},
+    {"y", 0xa8},
+    {"z", 0xa9},
+    {"zero", 0xf0},
+    {"A", 0xc1},
+    {"B", 0xc2},
+    {"C", 0xc3},
+    {"D", 0xc4},
+    {"E", 0xc5},
+    {"F", 0xc6},
+    {"G", 0xc7},
+    {"H", 0xc8},
+    {"I", 0xc9},
+    {"J", 0xd1},
+    {"K", 0xd2},
+    {"L", 0xd3},
+    {"M", 0xd4},
+    {"N", 0xd5},
+    {"NUL", 0},
+    {"O", 0xd6},
+    {"P", 0xd7},
+    {"Q", 0xd8},
+    {"R", 0xd9},
+    {"S", 0xe2},
+    {"T", 0xe3},
+    {"U", 0xe4},
+    {"V", 0xe5},
+    {"W", 0xe6},
+    {"X", 0xe7},
+    {"Y", 0xe8},
+    {"Z", 0xe9}
+};
+#else
+// ASCII
 const collationnames collatenames[] =
 {
     {"A", 0x41},
@@ -190,6 +309,7 @@
     {"z", 0x7a},
     {"zero", 0x30}
 };
+#endif
 
 struct classnames
 {