blob: 5ef8b78e83c44db8e5e6b7112984cca87e5c9092 [file] [log] [blame]
Olivier Giroux161e6e82020-02-18 09:58:34 -05001// -*- C++ -*-
2//===--------------------------- barrier ----------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14 barrier synopsis
15
16namespace std
17{
18
19 template<class CompletionFunction = see below>
20 class barrier
21 {
22 public:
23 using arrival_token = see below;
24
25 constexpr explicit barrier(ptrdiff_t phase_count,
26 CompletionFunction f = CompletionFunction());
27 ~barrier();
28
29 barrier(const barrier&) = delete;
30 barrier& operator=(const barrier&) = delete;
31
32 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
33 void wait(arrival_token&& arrival) const;
34
35 void arrive_and_wait();
36 void arrive_and_drop();
37
38 private:
39 CompletionFunction completion; // exposition only
40 };
41
42}
43
44*/
45
46#include <__config>
47#include <atomic>
48#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
49# include <memory>
50#endif
51
52#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
53#pragma GCC system_header
54#endif
55
56#ifdef _LIBCPP_HAS_NO_THREADS
57# error <barrier> is not supported on this single threaded system
58#endif
59
Louis Dionne1ab95312020-02-24 18:12:44 -050060#if _LIBCPP_STD_VER >= 14
Olivier Giroux161e6e82020-02-18 09:58:34 -050061
62_LIBCPP_BEGIN_NAMESPACE_STD
63
64struct __empty_completion
65{
66 inline _LIBCPP_INLINE_VISIBILITY
67 void operator()() noexcept
68 {
69 }
70};
71
72#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
73
74/*
75
76The default implementation of __barrier_base is a classic tree barrier.
77
78It looks different from literature pseudocode for two main reasons:
79 1. Threads that call into std::barrier functions do not provide indices,
80 so a numbering step is added before the actual barrier algorithm,
81 appearing as an N+1 round to the N rounds of the tree barrier.
82 2. A great deal of attention has been paid to avoid cache line thrashing
83 by flattening the tree structure into cache-line sized arrays, that
84 are indexed in an efficient way.
85
86*/
87
88using __barrier_phase_t = uint8_t;
89
90class __barrier_algorithm_base;
91
Louis Dionne48a828b2020-02-24 10:08:41 -050092_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
Olivier Giroux161e6e82020-02-18 09:58:34 -050093__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
94
Louis Dionne48a828b2020-02-24 10:08:41 -050095_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
Olivier Giroux161e6e82020-02-18 09:58:34 -050096bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
97 __barrier_phase_t __old_phase);
98
Louis Dionne48a828b2020-02-24 10:08:41 -050099_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
Olivier Giroux161e6e82020-02-18 09:58:34 -0500100void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
101
102template<class _CompletionF>
103class __barrier_base {
104
105 ptrdiff_t __expected;
106 unique_ptr<__barrier_algorithm_base,
107 decltype(&__destroy_barrier_algorithm_base)> __base;
108 __atomic_base<ptrdiff_t> __expected_adjustment;
109 _CompletionF __completion;
110 __atomic_base<__barrier_phase_t> __phase;
111
112public:
113 using arrival_token = __barrier_phase_t;
114
115 static constexpr ptrdiff_t max() noexcept {
116 return numeric_limits<ptrdiff_t>::max();
117 }
118
119 _LIBCPP_INLINE_VISIBILITY
120 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
121 : __expected(__expected), __base(__construct_barrier_algorithm_base(this->__expected),
122 &__destroy_barrier_algorithm_base),
123 __expected_adjustment(0), __completion(move(__completion)), __phase(0)
124 {
125 }
126 [[nodiscard]] _LIBCPP_INLINE_VISIBILITY
127 arrival_token arrive(ptrdiff_t update)
128 {
129 auto const __old_phase = __phase.load(memory_order_relaxed);
130 for(; update; --update)
131 if(__arrive_barrier_algorithm_base(__base.get(), __old_phase)) {
132 __completion();
133 __expected += __expected_adjustment.load(memory_order_relaxed);
134 __expected_adjustment.store(0, memory_order_relaxed);
135 __phase.store(__old_phase + 2, memory_order_release);
136 __phase.notify_all();
137 }
138 return __old_phase;
139 }
140 _LIBCPP_INLINE_VISIBILITY
141 void wait(arrival_token&& __old_phase) const
142 {
143 auto const __test_fn = [=]() -> bool {
144 return __phase.load(memory_order_acquire) != __old_phase;
145 };
146 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
147 }
148 _LIBCPP_INLINE_VISIBILITY
149 void arrive_and_drop()
150 {
151 __expected_adjustment.fetch_sub(1, memory_order_relaxed);
152 (void)arrive(1);
153 }
154};
155
156#else
157
158/*
159
160The alternative implementation of __barrier_base is a central barrier.
161
162Two versions of this algorithm are provided:
163 1. A fairly straightforward implementation of the litterature for the
164 general case where the completion function is not empty.
165 2. An optimized implementation that exploits 2's complement arithmetic
166 and well-defined overflow in atomic arithmetic, to handle the phase
167 roll-over for free.
168
169*/
170
171template<class _CompletionF>
172class __barrier_base {
173
174 __atomic_base<ptrdiff_t> __expected;
175 __atomic_base<ptrdiff_t> __arrived;
176 _CompletionF __completion;
177 __atomic_base<bool> __phase;
178public:
179 using arrival_token = bool;
180
181 static constexpr ptrdiff_t max() noexcept {
182 return numeric_limits<ptrdiff_t>::max();
183 }
184
185 _LIBCPP_INLINE_VISIBILITY
186 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
187 : __expected(__expected), __arrived(__expected), __completion(move(__completion)), __phase(false)
188 {
189 }
190 [[nodiscard]] _LIBCPP_INLINE_VISIBILITY
191 arrival_token arrive(ptrdiff_t update)
192 {
193 auto const __old_phase = __phase.load(memory_order_relaxed);
194 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
195 auto const new_expected = __expected.load(memory_order_relaxed);
196 if(0 == __result) {
197 __completion();
198 __arrived.store(new_expected, memory_order_relaxed);
199 __phase.store(!__old_phase, memory_order_release);
200 __phase.notify_all();
201 }
202 return __old_phase;
203 }
204 _LIBCPP_INLINE_VISIBILITY
205 void wait(arrival_token&& __old_phase) const
206 {
207 __phase.wait(__old_phase, memory_order_acquire);
208 }
209 _LIBCPP_INLINE_VISIBILITY
210 void arrive_and_drop()
211 {
212 __expected.fetch_sub(1, memory_order_relaxed);
213 (void)arrive(1);
214 }
215};
216
217template<>
218class __barrier_base<__empty_completion> {
219
220 static constexpr uint64_t __expected_unit = 1ull;
221 static constexpr uint64_t __arrived_unit = 1ull << 32;
222 static constexpr uint64_t __expected_mask = __arrived_unit - 1;
223 static constexpr uint64_t __phase_bit = 1ull << 63;
224 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
225
226 __atomic_base<uint64_t> __phase_arrived_expected;
227
228 static _LIBCPP_INLINE_VISIBILITY
229 constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
230 {
231 return ((uint64_t(1u << 31) - __count) << 32)
232 | (uint64_t(1u << 31) - __count);
233 }
234
235public:
236 using arrival_token = uint64_t;
237
238 static constexpr ptrdiff_t max() noexcept {
239 return ptrdiff_t(1u << 31) - 1;
240 }
241
242 _LIBCPP_INLINE_VISIBILITY
243 explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
244 : __phase_arrived_expected(__init(__count))
245 {
246 }
247 [[nodiscard]] inline _LIBCPP_INLINE_VISIBILITY
248 arrival_token arrive(ptrdiff_t update)
249 {
250 auto const __inc = __arrived_unit * update;
251 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
252 if((__old ^ (__old + __inc)) & __phase_bit) {
253 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
254 __phase_arrived_expected.notify_all();
255 }
256 return __old & __phase_bit;
257 }
258 inline _LIBCPP_INLINE_VISIBILITY
259 void wait(arrival_token&& __phase) const
260 {
261 auto const __test_fn = [=]() -> bool {
262 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
263 return ((__current & __phase_bit) != __phase);
264 };
265 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
266 }
267 inline _LIBCPP_INLINE_VISIBILITY
268 void arrive_and_drop()
269 {
270 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
271 (void)arrive(1);
272 }
273};
274
275#endif //_LIBCPP_HAS_NO_TREE_BARRIER
276
277template<class _CompletionF = __empty_completion>
278class barrier {
279
280 __barrier_base<_CompletionF> __b;
281public:
282 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
283
284 static constexpr ptrdiff_t max() noexcept {
285 return __barrier_base<_CompletionF>::max();
286 }
287
288 _LIBCPP_INLINE_VISIBILITY
289 barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
290 : __b(__count, std::move(__completion)) {
291 }
292
293 barrier(barrier const&) = delete;
294 barrier& operator=(barrier const&) = delete;
295
296 [[nodiscard]] _LIBCPP_INLINE_VISIBILITY
297 arrival_token arrive(ptrdiff_t update = 1)
298 {
299 return __b.arrive(update);
300 }
301 _LIBCPP_INLINE_VISIBILITY
302 void wait(arrival_token&& __phase) const
303 {
304 __b.wait(std::move(__phase));
305 }
306 _LIBCPP_INLINE_VISIBILITY
307 void arrive_and_wait()
308 {
309 wait(arrive());
310 }
311 _LIBCPP_INLINE_VISIBILITY
312 void arrive_and_drop()
313 {
314 __b.arrive_and_drop();
315 }
316};
317
318_LIBCPP_END_NAMESPACE_STD
319
Louis Dionne1ab95312020-02-24 18:12:44 -0500320#endif // _LIBCPP_STD_VER >= 14
321
Olivier Giroux161e6e82020-02-18 09:58:34 -0500322#endif //_LIBCPP_BARRIER