Despite my pathological distrust of spin locks, the number just don't lie.  I've put a small spin in __sp_mut::lock() on std::mutex::try_lock(), which is testing quite well.  In my experience, putting in a yield for every failed iteration is also a major performance booster.  This change makes one of the performance tests I was using (a highly contended one) run about 20 times faster.

llvm-svn: 160967
Cr-Mirrored-From: sso://chromium.googlesource.com/_direct/external/github.com/llvm/llvm-project
Cr-Mirrored-Commit: 088e37c77aafaec5ead8fbe7ebf918265e6b86f2
diff --git a/src/memory.cpp b/src/memory.cpp
index 7caab26..1c108b8 100644
--- a/src/memory.cpp
+++ b/src/memory.cpp
@@ -10,6 +10,7 @@
 #define _LIBCPP_BUILDING_MEMORY
 #include "memory"
 #include "mutex"
+#include "thread"
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
@@ -129,13 +130,23 @@
 void
 __sp_mut::lock() _NOEXCEPT
 {
-    reinterpret_cast<mutex*>(_)->lock();
+    mutex& m = *static_cast<mutex*>(_);
+    unsigned count = 0;
+    while (!m.try_lock())
+    {
+        if (++count > 16)
+        {
+            m.lock();
+            break;
+        }
+        this_thread::yield();
+    }
 }
 
 void
 __sp_mut::unlock() _NOEXCEPT
 {
-    reinterpret_cast<mutex*>(_)->unlock();
+    static_cast<mutex*>(_)->unlock();
 }
 
 __sp_mut&