[libc++] Add missing short wchar handling for codecvt_utf8, codecvt_utf16 and codecvt_utf8_utf16

Summary:
AIX have 2 byte wchar in 32 bit mode and 4 byte wchar in 64 bit mode.
This patch add more missing short wchar handling under the existing _LIBCPP_SHORT_WCHAR macro.

Marked test case ctor_move.pass.cpp as XFAIL for 32-bit mode on AIX because UTF-8 constants used cannot be converted to 2-byte wchar (by xingxue).

Authored by: jasonliu

Reviewed by: ldionne, zibi, SeanP, libc++

Differential Revision: https://reviews.llvm.org/D100777

NOKEYCHECK=True
GitOrigin-RevId: f53fafbacbc22bacd38fd1bdd52359156e6bfcba
diff --git a/src/locale.cpp b/src/locale.cpp
index c1b9726..8aa67f7 100644
--- a/src/locale.cpp
+++ b/src/locale.cpp
@@ -3485,15 +3485,25 @@
 {
     const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
     const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+#if defined(_LIBCPP_SHORT_WCHAR)
+    return utf8_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#else
     return utf8_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#endif
 }
 
 int
 __codecvt_utf8<wchar_t>::do_max_length() const noexcept
 {
+#if defined(_LIBCPP_SHORT_WCHAR)
+    if (_Mode_ & consume_header)
+        return 6;
+    return 3;
+#else
     if (_Mode_ & consume_header)
         return 7;
     return 4;
+#endif
 }
 
 // __codecvt_utf8<char16_t>
@@ -3653,14 +3663,25 @@
     const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
     extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
 {
+#if defined(_LIBCPP_SHORT_WCHAR)
+    const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+    const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+    const uint16_t* _frm_nxt = _frm;
+#else
     const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
     const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
     const uint32_t* _frm_nxt = _frm;
+#endif
     uint8_t* _to = reinterpret_cast<uint8_t*>(to);
     uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
     uint8_t* _to_nxt = _to;
+#if defined(_LIBCPP_SHORT_WCHAR)
+    result r = ucs2_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+                               _Maxcode_, _Mode_);
+#else
     result r = ucs4_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
                                _Maxcode_, _Mode_);
+#endif
     frm_nxt = frm + (_frm_nxt - _frm);
     to_nxt = to + (_to_nxt - _to);
     return r;
@@ -3674,11 +3695,19 @@
     const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
     const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
     const uint8_t* _frm_nxt = _frm;
+#if defined(_LIBCPP_SHORT_WCHAR)
+    uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+    uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+    uint16_t* _to_nxt = _to;
+    result r = utf16be_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+                               _Maxcode_, _Mode_);
+#else
     uint32_t* _to = reinterpret_cast<uint32_t*>(to);
     uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
     uint32_t* _to_nxt = _to;
     result r = utf16be_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
                                _Maxcode_, _Mode_);
+#endif
     frm_nxt = frm + (_frm_nxt - _frm);
     to_nxt = to + (_to_nxt - _to);
     return r;
@@ -3710,15 +3739,25 @@
 {
     const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
     const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+#if defined(_LIBCPP_SHORT_WCHAR)
+    return utf16be_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#else
     return utf16be_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#endif
 }
 
 int
 __codecvt_utf16<wchar_t, false>::do_max_length() const noexcept
 {
+#if defined(_LIBCPP_SHORT_WCHAR)
+    if (_Mode_ & consume_header)
+        return 4;
+    return 2;
+#else
     if (_Mode_ & consume_header)
         return 6;
     return 4;
+#endif
 }
 
 // __codecvt_utf16<wchar_t, true>
@@ -3728,14 +3767,25 @@
     const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
     extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
 {
+#if defined(_LIBCPP_SHORT_WCHAR)
+    const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+    const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+    const uint16_t* _frm_nxt = _frm;
+#else
     const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
     const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
     const uint32_t* _frm_nxt = _frm;
+#endif
     uint8_t* _to = reinterpret_cast<uint8_t*>(to);
     uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
     uint8_t* _to_nxt = _to;
+#if defined(_LIBCPP_SHORT_WCHAR)
+    result r = ucs2_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+                               _Maxcode_, _Mode_);
+#else
     result r = ucs4_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
                                _Maxcode_, _Mode_);
+#endif
     frm_nxt = frm + (_frm_nxt - _frm);
     to_nxt = to + (_to_nxt - _to);
     return r;
@@ -3749,11 +3799,19 @@
     const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
     const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
     const uint8_t* _frm_nxt = _frm;
+#if defined(_LIBCPP_SHORT_WCHAR)
+    uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+    uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+    uint16_t* _to_nxt = _to;
+    result r = utf16le_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+                               _Maxcode_, _Mode_);
+#else
     uint32_t* _to = reinterpret_cast<uint32_t*>(to);
     uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
     uint32_t* _to_nxt = _to;
     result r = utf16le_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
                                _Maxcode_, _Mode_);
+#endif
     frm_nxt = frm + (_frm_nxt - _frm);
     to_nxt = to + (_to_nxt - _to);
     return r;
@@ -3785,15 +3843,25 @@
 {
     const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
     const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+#if defined(_LIBCPP_SHORT_WCHAR)
+    return utf16le_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#else
     return utf16le_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#endif
 }
 
 int
 __codecvt_utf16<wchar_t, true>::do_max_length() const noexcept
 {
+#if defined(_LIBCPP_SHORT_WCHAR)
+    if (_Mode_ & consume_header)
+        return 4;
+    return 2;
+#else
     if (_Mode_ & consume_header)
         return 6;
     return 4;
+#endif
 }
 
 // __codecvt_utf16<char16_t, false>
@@ -4103,9 +4171,15 @@
     const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
     extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
 {
+#if defined(_LIBCPP_SHORT_WCHAR)
+    const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+    const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+    const uint16_t* _frm_nxt = _frm;
+#else
     const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
     const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
     const uint32_t* _frm_nxt = _frm;
+#endif
     uint8_t* _to = reinterpret_cast<uint8_t*>(to);
     uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
     uint8_t* _to_nxt = _to;
@@ -4124,9 +4198,15 @@
     const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
     const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
     const uint8_t* _frm_nxt = _frm;
+#if defined(_LIBCPP_SHORT_WCHAR)
+    uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+    uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+    uint16_t* _to_nxt = _to;
+#else
     uint32_t* _to = reinterpret_cast<uint32_t*>(to);
     uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
     uint32_t* _to_nxt = _to;
+#endif
     result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
                              _Maxcode_, _Mode_);
     frm_nxt = frm + (_frm_nxt - _frm);