blob: e6dc0a19a5c36b619ef225590bb415addb075611 [file] [log] [blame]
Olivier Giroux161e6e82020-02-18 09:58:34 -05001// -*- C++ -*-
2//===--------------------------- barrier ----------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14 barrier synopsis
15
16namespace std
17{
18
19 template<class CompletionFunction = see below>
20 class barrier
21 {
22 public:
23 using arrival_token = see below;
24
25 constexpr explicit barrier(ptrdiff_t phase_count,
26 CompletionFunction f = CompletionFunction());
27 ~barrier();
28
29 barrier(const barrier&) = delete;
30 barrier& operator=(const barrier&) = delete;
31
32 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
33 void wait(arrival_token&& arrival) const;
34
35 void arrive_and_wait();
36 void arrive_and_drop();
37
38 private:
39 CompletionFunction completion; // exposition only
40 };
41
42}
43
44*/
45
46#include <__config>
47#include <atomic>
48#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
49# include <memory>
50#endif
51
52#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
53#pragma GCC system_header
54#endif
55
56#ifdef _LIBCPP_HAS_NO_THREADS
57# error <barrier> is not supported on this single threaded system
58#endif
59
60#if _LIBCPP_STD_VER < 14
61# error <barrier> requires C++14 or later
62#endif
63
64_LIBCPP_BEGIN_NAMESPACE_STD
65
66struct __empty_completion
67{
68 inline _LIBCPP_INLINE_VISIBILITY
69 void operator()() noexcept
70 {
71 }
72};
73
74#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
75
76/*
77
78The default implementation of __barrier_base is a classic tree barrier.
79
80It looks different from literature pseudocode for two main reasons:
81 1. Threads that call into std::barrier functions do not provide indices,
82 so a numbering step is added before the actual barrier algorithm,
83 appearing as an N+1 round to the N rounds of the tree barrier.
84 2. A great deal of attention has been paid to avoid cache line thrashing
85 by flattening the tree structure into cache-line sized arrays, that
86 are indexed in an efficient way.
87
88*/
89
90using __barrier_phase_t = uint8_t;
91
92class __barrier_algorithm_base;
93
Louis Dionne48a828b2020-02-24 10:08:41 -050094_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
Olivier Giroux161e6e82020-02-18 09:58:34 -050095__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
96
Louis Dionne48a828b2020-02-24 10:08:41 -050097_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
Olivier Giroux161e6e82020-02-18 09:58:34 -050098bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
99 __barrier_phase_t __old_phase);
100
Louis Dionne48a828b2020-02-24 10:08:41 -0500101_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
Olivier Giroux161e6e82020-02-18 09:58:34 -0500102void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
103
104template<class _CompletionF>
105class __barrier_base {
106
107 ptrdiff_t __expected;
108 unique_ptr<__barrier_algorithm_base,
109 decltype(&__destroy_barrier_algorithm_base)> __base;
110 __atomic_base<ptrdiff_t> __expected_adjustment;
111 _CompletionF __completion;
112 __atomic_base<__barrier_phase_t> __phase;
113
114public:
115 using arrival_token = __barrier_phase_t;
116
117 static constexpr ptrdiff_t max() noexcept {
118 return numeric_limits<ptrdiff_t>::max();
119 }
120
121 _LIBCPP_INLINE_VISIBILITY
122 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
123 : __expected(__expected), __base(__construct_barrier_algorithm_base(this->__expected),
124 &__destroy_barrier_algorithm_base),
125 __expected_adjustment(0), __completion(move(__completion)), __phase(0)
126 {
127 }
128 [[nodiscard]] _LIBCPP_INLINE_VISIBILITY
129 arrival_token arrive(ptrdiff_t update)
130 {
131 auto const __old_phase = __phase.load(memory_order_relaxed);
132 for(; update; --update)
133 if(__arrive_barrier_algorithm_base(__base.get(), __old_phase)) {
134 __completion();
135 __expected += __expected_adjustment.load(memory_order_relaxed);
136 __expected_adjustment.store(0, memory_order_relaxed);
137 __phase.store(__old_phase + 2, memory_order_release);
138 __phase.notify_all();
139 }
140 return __old_phase;
141 }
142 _LIBCPP_INLINE_VISIBILITY
143 void wait(arrival_token&& __old_phase) const
144 {
145 auto const __test_fn = [=]() -> bool {
146 return __phase.load(memory_order_acquire) != __old_phase;
147 };
148 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
149 }
150 _LIBCPP_INLINE_VISIBILITY
151 void arrive_and_drop()
152 {
153 __expected_adjustment.fetch_sub(1, memory_order_relaxed);
154 (void)arrive(1);
155 }
156};
157
158#else
159
160/*
161
162The alternative implementation of __barrier_base is a central barrier.
163
164Two versions of this algorithm are provided:
165 1. A fairly straightforward implementation of the litterature for the
166 general case where the completion function is not empty.
167 2. An optimized implementation that exploits 2's complement arithmetic
168 and well-defined overflow in atomic arithmetic, to handle the phase
169 roll-over for free.
170
171*/
172
173template<class _CompletionF>
174class __barrier_base {
175
176 __atomic_base<ptrdiff_t> __expected;
177 __atomic_base<ptrdiff_t> __arrived;
178 _CompletionF __completion;
179 __atomic_base<bool> __phase;
180public:
181 using arrival_token = bool;
182
183 static constexpr ptrdiff_t max() noexcept {
184 return numeric_limits<ptrdiff_t>::max();
185 }
186
187 _LIBCPP_INLINE_VISIBILITY
188 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
189 : __expected(__expected), __arrived(__expected), __completion(move(__completion)), __phase(false)
190 {
191 }
192 [[nodiscard]] _LIBCPP_INLINE_VISIBILITY
193 arrival_token arrive(ptrdiff_t update)
194 {
195 auto const __old_phase = __phase.load(memory_order_relaxed);
196 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
197 auto const new_expected = __expected.load(memory_order_relaxed);
198 if(0 == __result) {
199 __completion();
200 __arrived.store(new_expected, memory_order_relaxed);
201 __phase.store(!__old_phase, memory_order_release);
202 __phase.notify_all();
203 }
204 return __old_phase;
205 }
206 _LIBCPP_INLINE_VISIBILITY
207 void wait(arrival_token&& __old_phase) const
208 {
209 __phase.wait(__old_phase, memory_order_acquire);
210 }
211 _LIBCPP_INLINE_VISIBILITY
212 void arrive_and_drop()
213 {
214 __expected.fetch_sub(1, memory_order_relaxed);
215 (void)arrive(1);
216 }
217};
218
219template<>
220class __barrier_base<__empty_completion> {
221
222 static constexpr uint64_t __expected_unit = 1ull;
223 static constexpr uint64_t __arrived_unit = 1ull << 32;
224 static constexpr uint64_t __expected_mask = __arrived_unit - 1;
225 static constexpr uint64_t __phase_bit = 1ull << 63;
226 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
227
228 __atomic_base<uint64_t> __phase_arrived_expected;
229
230 static _LIBCPP_INLINE_VISIBILITY
231 constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
232 {
233 return ((uint64_t(1u << 31) - __count) << 32)
234 | (uint64_t(1u << 31) - __count);
235 }
236
237public:
238 using arrival_token = uint64_t;
239
240 static constexpr ptrdiff_t max() noexcept {
241 return ptrdiff_t(1u << 31) - 1;
242 }
243
244 _LIBCPP_INLINE_VISIBILITY
245 explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
246 : __phase_arrived_expected(__init(__count))
247 {
248 }
249 [[nodiscard]] inline _LIBCPP_INLINE_VISIBILITY
250 arrival_token arrive(ptrdiff_t update)
251 {
252 auto const __inc = __arrived_unit * update;
253 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
254 if((__old ^ (__old + __inc)) & __phase_bit) {
255 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
256 __phase_arrived_expected.notify_all();
257 }
258 return __old & __phase_bit;
259 }
260 inline _LIBCPP_INLINE_VISIBILITY
261 void wait(arrival_token&& __phase) const
262 {
263 auto const __test_fn = [=]() -> bool {
264 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
265 return ((__current & __phase_bit) != __phase);
266 };
267 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
268 }
269 inline _LIBCPP_INLINE_VISIBILITY
270 void arrive_and_drop()
271 {
272 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
273 (void)arrive(1);
274 }
275};
276
277#endif //_LIBCPP_HAS_NO_TREE_BARRIER
278
279template<class _CompletionF = __empty_completion>
280class barrier {
281
282 __barrier_base<_CompletionF> __b;
283public:
284 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
285
286 static constexpr ptrdiff_t max() noexcept {
287 return __barrier_base<_CompletionF>::max();
288 }
289
290 _LIBCPP_INLINE_VISIBILITY
291 barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
292 : __b(__count, std::move(__completion)) {
293 }
294
295 barrier(barrier const&) = delete;
296 barrier& operator=(barrier const&) = delete;
297
298 [[nodiscard]] _LIBCPP_INLINE_VISIBILITY
299 arrival_token arrive(ptrdiff_t update = 1)
300 {
301 return __b.arrive(update);
302 }
303 _LIBCPP_INLINE_VISIBILITY
304 void wait(arrival_token&& __phase) const
305 {
306 __b.wait(std::move(__phase));
307 }
308 _LIBCPP_INLINE_VISIBILITY
309 void arrive_and_wait()
310 {
311 wait(arrive());
312 }
313 _LIBCPP_INLINE_VISIBILITY
314 void arrive_and_drop()
315 {
316 __b.arrive_and_drop();
317 }
318};
319
320_LIBCPP_END_NAMESPACE_STD
321
322#endif //_LIBCPP_BARRIER