Partially inline basic_string copy constructor in UNSTABLE
his change splits the copy constructor up inlining short initialization, and explicitly outlining long initialization into __init_copy_ctor_external() which is the externally instantiated slow path.
For unstable ABI, this has the following changes:
remove basic_string(const basic_string&)
remove basic_string(const basic_string&, const Allocator&)
add __init_copy_ctor_external(const value_type*, size_type)
Quick local benchmark for Copy:
Master
```
---------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------
BM_StringCopy_Empty 3.50 ns 3.51 ns 199326720
BM_StringCopy_Small 3.50 ns 3.51 ns 199510016
BM_StringCopy_Large 15.7 ns 15.7 ns 45230080
BM_StringCopy_Huge 1503 ns 1503 ns 464896
```
```
---------------------------------------------------------------
Benchmark Time CPU Iterations
---------------------------------------------------------------
BM_StringCopy_Empty 1.99 ns 2.00 ns 356471808
BM_StringCopy_Small 3.29 ns 3.30 ns 203425792
BM_StringCopy_Large 13.3 ns 13.3 ns 52948992
BM_StringCopy_Huge 1472 ns 1472 ns 475136
```
Author: Martijn Vels <martijn.vels@gmail.com>
Reviewers: EricWF, mclow.list
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D73223
Cr-Mirrored-From: https://chromium.googlesource.com/external/github.com/llvm/llvm-project
Cr-Mirrored-Commit: 8cf76e913b867a98a9843aa1b3d782632ed5d930
diff --git a/include/string b/include/string
index c2a4220..ffa8c66 100644
--- a/include/string
+++ b/include/string
@@ -1549,6 +1549,16 @@
inline
void __init(size_type __n, value_type __c);
+ // Slow path for the (inlined) copy constructor for 'long' strings.
+ // Always externally instantiated and not inlined.
+ // Requires that __s is zero terminated.
+ // The main reason for this function to exist is because for unstable, we
+ // want to allow inlining of the copy constructor. However, we don't want
+ // to call the __init() functions as those are marked as inline which may
+ // result in over-aggressive inlining by the compiler, where our aim is
+ // to only inline the fast path code directly in the ctor.
+ void __init_copy_ctor_external(const value_type* __s, size_type __sz);
+
template <class _InputIterator>
inline
_EnableIf
@@ -1850,7 +1860,9 @@
if (!__str.__is_long())
__r_.first().__r = __str.__r_.first().__r;
else
- __init(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size());
+ __init_copy_ctor_external(_VSTD::__to_address(__str.__get_long_pointer()),
+ __str.__get_long_size());
+
#if _LIBCPP_DEBUG_LEVEL >= 2
__get_db()->__insert_c(this);
#endif
@@ -1864,7 +1876,8 @@
if (!__str.__is_long())
__r_.first().__r = __str.__r_.first().__r;
else
- __init(_VSTD::__to_address(__str.__get_long_pointer()), __str.__get_long_size());
+ __init_copy_ctor_external(_VSTD::__to_address(__str.__get_long_pointer()),
+ __str.__get_long_size());
#if _LIBCPP_DEBUG_LEVEL >= 2
__get_db()->__insert_c(this);
#endif
@@ -1891,6 +1904,25 @@
}
template <class _CharT, class _Traits, class _Allocator>
+void basic_string<_CharT, _Traits, _Allocator>::__init_copy_ctor_external(
+ const value_type* __s, size_type __sz) {
+ pointer __p;
+ if (__sz < __min_cap) {
+ __p = __get_short_pointer();
+ __set_short_size(__sz);
+ } else {
+ if (__sz > max_size())
+ this->__throw_length_error();
+ size_t __cap = __recommend(__sz);
+ __p = __alloc_traits::allocate(__alloc(), __cap + 1);
+ __set_long_pointer(__p);
+ __set_long_cap(__cap + 1);
+ __set_long_size(__sz);
+ }
+ traits_type::copy(_VSTD::__to_address(__p), __s, __sz + 1);
+}
+
+template <class _CharT, class _Traits, class _Allocator>
inline
basic_string<_CharT, _Traits, _Allocator>::basic_string(basic_string&& __str, const allocator_type& __a)
: __r_(__default_init_tag(), __a)