[libc++] Add missing short wchar handling for codecvt_utf8, codecvt_utf16 and codecvt_utf8_utf16
Summary:
AIX have 2 byte wchar in 32 bit mode and 4 byte wchar in 64 bit mode.
This patch add more missing short wchar handling under the existing _LIBCPP_SHORT_WCHAR macro.
Marked test case ctor_move.pass.cpp as XFAIL for 32-bit mode on AIX because UTF-8 constants used cannot be converted to 2-byte wchar (by xingxue).
Authored by: jasonliu
Reviewed by: ldionne, zibi, SeanP, libc++
Differential Revision: https://reviews.llvm.org/D100777
NOKEYCHECK=True
GitOrigin-RevId: f53fafbacbc22bacd38fd1bdd52359156e6bfcba
diff --git a/src/locale.cpp b/src/locale.cpp
index c1b9726..8aa67f7 100644
--- a/src/locale.cpp
+++ b/src/locale.cpp
@@ -3485,15 +3485,25 @@
{
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+#if defined(_LIBCPP_SHORT_WCHAR)
+ return utf8_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#else
return utf8_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#endif
}
int
__codecvt_utf8<wchar_t>::do_max_length() const noexcept
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ if (_Mode_ & consume_header)
+ return 6;
+ return 3;
+#else
if (_Mode_ & consume_header)
return 7;
return 4;
+#endif
}
// __codecvt_utf8<char16_t>
@@ -3653,14 +3663,25 @@
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+ const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+ const uint16_t* _frm_nxt = _frm;
+#else
const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
const uint32_t* _frm_nxt = _frm;
+#endif
uint8_t* _to = reinterpret_cast<uint8_t*>(to);
uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
uint8_t* _to_nxt = _to;
+#if defined(_LIBCPP_SHORT_WCHAR)
+ result r = ucs2_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+#else
result r = ucs4_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
+#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@@ -3674,11 +3695,19 @@
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
const uint8_t* _frm_nxt = _frm;
+#if defined(_LIBCPP_SHORT_WCHAR)
+ uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+ uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+ uint16_t* _to_nxt = _to;
+ result r = utf16be_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+#else
uint32_t* _to = reinterpret_cast<uint32_t*>(to);
uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
uint32_t* _to_nxt = _to;
result r = utf16be_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
+#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@@ -3710,15 +3739,25 @@
{
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+#if defined(_LIBCPP_SHORT_WCHAR)
+ return utf16be_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#else
return utf16be_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#endif
}
int
__codecvt_utf16<wchar_t, false>::do_max_length() const noexcept
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ if (_Mode_ & consume_header)
+ return 4;
+ return 2;
+#else
if (_Mode_ & consume_header)
return 6;
return 4;
+#endif
}
// __codecvt_utf16<wchar_t, true>
@@ -3728,14 +3767,25 @@
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+ const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+ const uint16_t* _frm_nxt = _frm;
+#else
const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
const uint32_t* _frm_nxt = _frm;
+#endif
uint8_t* _to = reinterpret_cast<uint8_t*>(to);
uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
uint8_t* _to_nxt = _to;
+#if defined(_LIBCPP_SHORT_WCHAR)
+ result r = ucs2_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+#else
result r = ucs4_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
+#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@@ -3749,11 +3799,19 @@
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
const uint8_t* _frm_nxt = _frm;
+#if defined(_LIBCPP_SHORT_WCHAR)
+ uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+ uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+ uint16_t* _to_nxt = _to;
+ result r = utf16le_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+#else
uint32_t* _to = reinterpret_cast<uint32_t*>(to);
uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
uint32_t* _to_nxt = _to;
result r = utf16le_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
+#endif
frm_nxt = frm + (_frm_nxt - _frm);
to_nxt = to + (_to_nxt - _to);
return r;
@@ -3785,15 +3843,25 @@
{
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+#if defined(_LIBCPP_SHORT_WCHAR)
+ return utf16le_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#else
return utf16le_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+#endif
}
int
__codecvt_utf16<wchar_t, true>::do_max_length() const noexcept
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ if (_Mode_ & consume_header)
+ return 4;
+ return 2;
+#else
if (_Mode_ & consume_header)
return 6;
return 4;
+#endif
}
// __codecvt_utf16<char16_t, false>
@@ -4103,9 +4171,15 @@
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
+#if defined(_LIBCPP_SHORT_WCHAR)
+ const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+ const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+ const uint16_t* _frm_nxt = _frm;
+#else
const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
const uint32_t* _frm_nxt = _frm;
+#endif
uint8_t* _to = reinterpret_cast<uint8_t*>(to);
uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
uint8_t* _to_nxt = _to;
@@ -4124,9 +4198,15 @@
const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
const uint8_t* _frm_nxt = _frm;
+#if defined(_LIBCPP_SHORT_WCHAR)
+ uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+ uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+ uint16_t* _to_nxt = _to;
+#else
uint32_t* _to = reinterpret_cast<uint32_t*>(to);
uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
uint32_t* _to_nxt = _to;
+#endif
result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
_Maxcode_, _Mode_);
frm_nxt = frm + (_frm_nxt - _frm);