[SystemZ][z/OS] ASCII/EBCDIC support with no coexistence

The aim of this patch is to break up the larger patch (https://reviews.llvm.org/D111323) to be more upstream friendly. In particular, this patch adds the char encoding sensitive changes but does not use inline namespaces as before. The use of namespaces to build both versions of the library, and localization of error messages will follow in a subsequent patch.

Differential Revision: https://reviews.llvm.org/D114813

NOKEYCHECK=True
GitOrigin-RevId: a1da73961d291c6a205150caa6ebda71757b9add
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 82a643e..f78fb77 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -467,6 +467,9 @@
 # These flags get added to CMAKE_CXX_FLAGS and CMAKE_C_FLAGS so that
 # 'config-ix' use them during feature checks. It also adds them to both
 # 'LIBCXX_COMPILE_FLAGS' and 'LIBCXX_LINK_FLAGS'
+if(ZOS)
+  add_target_flags_if_supported("-fzos-le-char-mode=ebcdic")
+endif()
 if(LIBCXX_TARGET_TRIPLE)
   add_target_flags_if_supported("--target=${LIBCXX_TARGET_TRIPLE}")
 endif()
diff --git a/include/__config b/include/__config
index 654816b..85f334f 100644
--- a/include/__config
+++ b/include/__config
@@ -257,6 +257,10 @@
 #  endif // defined(__GLIBC_PREREQ)
 #endif // defined(__linux__)
 
+#if defined(__MVS__)
+#  include <features.h> // for __NATIVE_ASCII_F
+#endif
+
 #ifdef __LITTLE_ENDIAN__
 #  if __LITTLE_ENDIAN__
 #    define _LIBCPP_LITTLE_ENDIAN
@@ -1220,8 +1224,8 @@
 #endif
 
 #if defined(__BIONIC__) || defined(__NuttX__) ||      \
-    defined(__Fuchsia__) || defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC) || \
-    defined(__MVS__) || defined(__OpenBSD__)
+    defined(__Fuchsia__) || defined(__wasi__) ||		  \
+    defined(_LIBCPP_HAS_MUSL_LIBC) || defined(__OpenBSD__)
 #define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE
 #endif
 
diff --git a/include/__locale b/include/__locale
index 3bddbc8..6181f25 100644
--- a/include/__locale
+++ b/include/__locale
@@ -511,6 +511,33 @@
 # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT
 # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA
 # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT
+#elif defined(__MVS__)
+    static const mask __regex_word = 0x8000;
+# if defined(__NATIVE_ASCII_F)`
+    typedef unsigned int mask;
+    static const mask space  = _ISSPACE_A;
+    static const mask print  = _ISPRINT_A;
+    static const mask cntrl  = _ISCNTRL_A;
+    static const mask upper  = _ISUPPER_A;
+    static const mask lower  = _ISLOWER_A;
+    static const mask alpha  = _ISALPHA_A;
+    static const mask digit  = _ISDIGIT_A;
+    static const mask punct  = _ISPUNCT_A;
+    static const mask xdigit = _ISXDIGIT_A;
+    static const mask blank  = _ISBLANK_A;
+# else
+    typedef unsigned short mask;
+    static const mask space  = __ISSPACE;
+    static const mask print  = __ISPRINT;
+    static const mask cntrl  = __ISCNTRL;
+    static const mask upper  = __ISUPPER;
+    static const mask lower  = __ISLOWER;
+    static const mask alpha  = __ISALPHA;
+    static const mask digit  = __ISDIGIT;
+    static const mask punct  = __ISPUNCT;
+    static const mask xdigit = __ISXDIGIT;
+    static const mask blank  = __ISBLANK;
+# endif
 #else
 # error unknown rune table for this platform -- do you mean to define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE?
 #endif
@@ -734,6 +761,10 @@
     static const short* __classic_upper_table() _NOEXCEPT;
     static const short* __classic_lower_table() _NOEXCEPT;
 #endif
+#if defined(__MVS__)
+    static const unsigned short* __classic_upper_table() _NOEXCEPT;
+    static const unsigned short* __classic_lower_table() _NOEXCEPT;
+#endif
 
 protected:
     ~ctype();
diff --git a/include/regex b/include/regex
index 8203c81..59b2c9a 100644
--- a/include/regex
+++ b/include/regex
@@ -1310,19 +1310,51 @@
     return (__c == '_' && (__m & __regex_word));
 }
 
+inline _LIBCPP_INLINE_VISIBILITY
+bool __is_07(unsigned char c)
+{
+    return (c & 0xF8u) ==
+#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
+        0xF0;
+#else
+        0x30;
+#endif
+}
+
+inline _LIBCPP_INLINE_VISIBILITY
+bool __is_89(unsigned char c)
+{
+    return (c & 0xFEu) ==
+#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
+        0xF8;
+#else
+        0x38;
+#endif
+}
+
+inline _LIBCPP_INLINE_VISIBILITY
+unsigned char __to_lower(unsigned char c)
+{
+#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
+    return c & 0xBF;
+#else
+    return c | 0x20;
+#endif
+}
+
 template <class _CharT>
 int
 regex_traits<_CharT>::__regex_traits_value(unsigned char __ch, int __radix)
 {
-    if ((__ch & 0xF8u) == 0x30)  // '0' <= __ch && __ch <= '7'
+    if (__is_07(__ch))  // '0' <= __ch && __ch <= '7'
         return __ch - '0';
     if (__radix != 8)
     {
-        if ((__ch & 0xFEu) == 0x38)  // '8' <= __ch && __ch <= '9'
+        if (__is_89(__ch))  // '8' <= __ch && __ch <= '9'
             return __ch - '0';
         if (__radix == 16)
         {
-            __ch |= 0x20;  // tolower
+            __ch = __to_lower(__ch);  // tolower
             if ('a' <= __ch && __ch <= 'f')
                 return __ch - ('a' - 10);
         }
diff --git a/src/locale.cpp b/src/locale.cpp
index 79f03b8..2234784 100644
--- a/src/locale.cpp
+++ b/src/locale.cpp
@@ -898,7 +898,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
     return isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
     return isascii(c) ? ctype<char>::__classic_upper_table()[c] : c;
 #else
     return (isascii(c) && iswlower_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'a'+L'A' : c;
@@ -912,7 +912,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
         *low = isascii(*low) ? _DefaultRuneLocale.__mapupper[*low] : *low;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
         *low = isascii(*low) ? ctype<char>::__classic_upper_table()[*low]
                              : *low;
 #else
@@ -927,7 +927,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
     return isascii(c) ? _DefaultRuneLocale.__maplower[c] : c;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
     return isascii(c) ? ctype<char>::__classic_lower_table()[c] : c;
 #else
     return (isascii(c) && isupper_l(c, _LIBCPP_GET_C_LOCALE)) ? c-L'A'+'a' : c;
@@ -941,7 +941,7 @@
 #ifdef _LIBCPP_HAS_DEFAULTRUNELOCALE
         *low = isascii(*low) ? _DefaultRuneLocale.__maplower[*low] : *low;
 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || \
-      defined(__NetBSD__)
+      defined(__NetBSD__) || defined(__MVS__)
         *low = isascii(*low) ? ctype<char>::__classic_lower_table()[*low]
                              : *low;
 #else
@@ -1013,7 +1013,7 @@
       static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(c)]) : c;
 #elif defined(__NetBSD__)
     return static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
     return isascii(c) ?
       static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c;
 #else
@@ -1030,7 +1030,7 @@
           static_cast<char>(_DefaultRuneLocale.__mapupper[static_cast<ptrdiff_t>(*low)]) : *low;
 #elif defined(__NetBSD__)
         *low = static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
         *low = isascii(*low) ?
           static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low;
 #else
@@ -1047,7 +1047,7 @@
       static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(c)]) : c;
 #elif defined(__NetBSD__)
     return static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(c)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
     return isascii(c) ?
       static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c;
 #else
@@ -1063,7 +1063,7 @@
         *low = isascii(*low) ? static_cast<char>(_DefaultRuneLocale.__maplower[static_cast<ptrdiff_t>(*low)]) : *low;
 #elif defined(__NetBSD__)
         *low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]);
-#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__)
+#elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__MVS__)
         *low = isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low;
 #else
         *low = (isascii(*low) && isupper_l(*low, _LIBCPP_GET_C_LOCALE)) ? *low-'A'+'a' : *low;
@@ -1211,6 +1211,12 @@
     return _ctype_ + 1;
 #elif defined(_AIX)
     return (const unsigned int *)__lc_ctype_ptr->obj->mask;
+#elif defined(__MVS__)
+# if defined(__NATIVE_ASCII_F)
+    return const_cast<const ctype<char>::mask*> (__OBJ_DATA(__lc_ctype_a)->mask);
+# else
+    return const_cast<const ctype<char>::mask*> (__ctypec);
+# endif
 #else
     // Platform not supported: abort so the person doing the port knows what to
     // fix
@@ -1259,7 +1265,26 @@
 {
     return *__ctype_toupper_loc();
 }
-#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__
+#elif defined(__MVS__)
+const unsigned short*
+ctype<char>::__classic_lower_table() _NOEXCEPT
+{
+# if defined(__NATIVE_ASCII_F)
+  return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->lower);
+# else
+  return const_cast<const unsigned short*>(__ctype + __TOLOWER_INDEX);
+# endif
+}
+const unsigned short *
+ctype<char>::__classic_upper_table() _NOEXCEPT
+{
+# if defined(__NATIVE_ASCII_F)
+  return const_cast<const unsigned short*>(__OBJ_DATA(__lc_ctype_a)->upper);
+# else
+  return const_cast<const unsigned short*>(__ctype + __TOUPPER_INDEX);
+# endif
+}
+#endif // __GLIBC__ || __NETBSD__ || __EMSCRIPTEN__ || __MVS__
 
 // template <> class ctype_byname<char>
 
diff --git a/src/regex.cpp b/src/regex.cpp
index 425339a..16ad8f0 100644
--- a/src/regex.cpp
+++ b/src/regex.cpp
@@ -76,6 +76,125 @@
     char char_;
 };
 
+#if defined(__MVS__) && !defined(__NATIVE_ASCII_F)
+// EBCDIC IBM-1047
+// Sorted via the EBCDIC collating sequence
+const collationnames collatenames[] =
+{
+    {"a", 0x81},
+    {"alert", 0x2f},
+    {"ampersand", 0x50},
+    {"apostrophe", 0x7d},
+    {"asterisk", 0x5c},
+    {"b", 0x82},
+    {"backslash", 0xe0},
+    {"backspace", 0x16},
+    {"c", 0x83},
+    {"carriage-return", 0xd},
+    {"circumflex", 0x5f},
+    {"circumflex-accent", 0x5f},
+    {"colon", 0x7a},
+    {"comma", 0x6b},
+    {"commercial-at", 0x7c},
+    {"d", 0x84},
+    {"dollar-sign", 0x5b},
+    {"e", 0x85},
+    {"eight", 0xf8},
+    {"equals-sign", 0x7e},
+    {"exclamation-mark", 0x5a},
+    {"f", 0x86},
+    {"five", 0xf5},
+    {"form-feed", 0xc},
+    {"four", 0xf4},
+    {"full-stop", 0x4b},
+    {"g", 0x87},
+    {"grave-accent", 0x79},
+    {"greater-than-sign", 0x6e},
+    {"h", 0x88},
+    {"hyphen", 0x60},
+    {"hyphen-minus", 0x60},
+    {"i", 0x89},
+    {"j", 0x91},
+    {"k", 0x92},
+    {"l", 0x93},
+    {"left-brace", 0xc0},
+    {"left-curly-bracket", 0xc0},
+    {"left-parenthesis", 0x4d},
+    {"left-square-bracket", 0xad},
+    {"less-than-sign", 0x4c},
+    {"low-line", 0x6d},
+    {"m", 0x94},
+    {"n", 0x95},
+    {"newline", 0x15},
+    {"nine", 0xf9},
+    {"number-sign", 0x7b},
+    {"o", 0x96},
+    {"one", 0xf1},
+    {"p", 0x97},
+    {"percent-sign", 0x6c},
+    {"period", 0x4b},
+    {"plus-sign", 0x4e},
+    {"q", 0x98},
+    {"question-mark", 0x6f},
+    {"quotation-mark", 0x7f},
+    {"r", 0x99},
+    {"reverse-solidus", 0xe0},
+    {"right-brace", 0xd0},
+    {"right-curly-bracket", 0xd0},
+    {"right-parenthesis", 0x5d},
+    {"right-square-bracket", 0xbd},
+    {"s", 0xa2},
+    {"semicolon", 0x5e},
+    {"seven", 0xf7},
+    {"six", 0xf6},
+    {"slash", 0x61},
+    {"solidus", 0x61},
+    {"space", 0x40},
+    {"t", 0xa3},
+    {"tab", 0x5},
+    {"three", 0xf3},
+    {"tilde", 0xa1},
+    {"two", 0xf2},
+    {"u", 0xa4},
+    {"underscore", 0x6d},
+    {"v", 0xa5},
+    {"vertical-line", 0x4f},
+    {"vertical-tab", 0xb},
+    {"w", 0xa6},
+    {"x", 0xa7},
+    {"y", 0xa8},
+    {"z", 0xa9},
+    {"zero", 0xf0},
+    {"A", 0xc1},
+    {"B", 0xc2},
+    {"C", 0xc3},
+    {"D", 0xc4},
+    {"E", 0xc5},
+    {"F", 0xc6},
+    {"G", 0xc7},
+    {"H", 0xc8},
+    {"I", 0xc9},
+    {"J", 0xd1},
+    {"K", 0xd2},
+    {"L", 0xd3},
+    {"M", 0xd4},
+    {"N", 0xd5},
+    {"NUL", 0},
+    {"O", 0xd6},
+    {"P", 0xd7},
+    {"Q", 0xd8},
+    {"R", 0xd9},
+    {"S", 0xe2},
+    {"T", 0xe3},
+    {"U", 0xe4},
+    {"V", 0xe5},
+    {"W", 0xe6},
+    {"X", 0xe7},
+    {"Y", 0xe8},
+    {"Z", 0xe9}
+};
+#else
+// ASCII
 const collationnames collatenames[] =
 {
     {"A", 0x41},
@@ -190,6 +309,7 @@
     {"z", 0x7a},
     {"zero", 0x30}
 };
+#endif
 
 struct classnames
 {