blob: be213a6895ef77481aae9da2ce499fb5ff36a961 [file] [log] [blame]
Olivier Giroux161e6e82020-02-18 09:58:34 -05001// -*- C++ -*-
2//===--------------------------- barrier ----------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14 barrier synopsis
15
16namespace std
17{
18
19 template<class CompletionFunction = see below>
20 class barrier
21 {
22 public:
23 using arrival_token = see below;
24
Marek Kurdej7f401b92020-12-06 15:36:18 +010025 static constexpr ptrdiff_t max() noexcept;
26
Olivier Giroux161e6e82020-02-18 09:58:34 -050027 constexpr explicit barrier(ptrdiff_t phase_count,
28 CompletionFunction f = CompletionFunction());
29 ~barrier();
30
31 barrier(const barrier&) = delete;
32 barrier& operator=(const barrier&) = delete;
33
34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35 void wait(arrival_token&& arrival) const;
36
37 void arrive_and_wait();
38 void arrive_and_drop();
39
40 private:
41 CompletionFunction completion; // exposition only
42 };
43
44}
45
46*/
47
48#include <__config>
Louis Dionne73912b22020-11-04 15:01:25 -050049#include <__availability>
Olivier Giroux161e6e82020-02-18 09:58:34 -050050#include <atomic>
51#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
52# include <memory>
53#endif
54
55#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
56#pragma GCC system_header
57#endif
58
59#ifdef _LIBCPP_HAS_NO_THREADS
60# error <barrier> is not supported on this single threaded system
61#endif
62
Arthur O'Dwyer2422fa22020-12-02 18:55:01 -050063_LIBCPP_PUSH_MACROS
64#include <__undef_macros>
65
Louis Dionne1ab95312020-02-24 18:12:44 -050066#if _LIBCPP_STD_VER >= 14
Olivier Giroux161e6e82020-02-18 09:58:34 -050067
68_LIBCPP_BEGIN_NAMESPACE_STD
69
70struct __empty_completion
71{
72 inline _LIBCPP_INLINE_VISIBILITY
73 void operator()() noexcept
74 {
75 }
76};
77
78#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
79
80/*
81
82The default implementation of __barrier_base is a classic tree barrier.
83
84It looks different from literature pseudocode for two main reasons:
85 1. Threads that call into std::barrier functions do not provide indices,
86 so a numbering step is added before the actual barrier algorithm,
87 appearing as an N+1 round to the N rounds of the tree barrier.
88 2. A great deal of attention has been paid to avoid cache line thrashing
89 by flattening the tree structure into cache-line sized arrays, that
90 are indexed in an efficient way.
91
92*/
93
94using __barrier_phase_t = uint8_t;
95
96class __barrier_algorithm_base;
97
Louis Dionne48a828b2020-02-24 10:08:41 -050098_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
Olivier Giroux161e6e82020-02-18 09:58:34 -050099__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
100
Louis Dionne48a828b2020-02-24 10:08:41 -0500101_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
Olivier Giroux161e6e82020-02-18 09:58:34 -0500102bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
103 __barrier_phase_t __old_phase);
104
Louis Dionne48a828b2020-02-24 10:08:41 -0500105_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
Olivier Giroux161e6e82020-02-18 09:58:34 -0500106void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
107
108template<class _CompletionF>
109class __barrier_base {
110
111 ptrdiff_t __expected;
112 unique_ptr<__barrier_algorithm_base,
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500113 void (*)(__barrier_algorithm_base*)> __base;
Olivier Giroux161e6e82020-02-18 09:58:34 -0500114 __atomic_base<ptrdiff_t> __expected_adjustment;
115 _CompletionF __completion;
116 __atomic_base<__barrier_phase_t> __phase;
117
118public:
119 using arrival_token = __barrier_phase_t;
120
121 static constexpr ptrdiff_t max() noexcept {
122 return numeric_limits<ptrdiff_t>::max();
123 }
124
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500125 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500126 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
127 : __expected(__expected), __base(__construct_barrier_algorithm_base(this->__expected),
128 &__destroy_barrier_algorithm_base),
129 __expected_adjustment(0), __completion(move(__completion)), __phase(0)
130 {
131 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500132 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500133 arrival_token arrive(ptrdiff_t update)
134 {
135 auto const __old_phase = __phase.load(memory_order_relaxed);
136 for(; update; --update)
137 if(__arrive_barrier_algorithm_base(__base.get(), __old_phase)) {
138 __completion();
139 __expected += __expected_adjustment.load(memory_order_relaxed);
140 __expected_adjustment.store(0, memory_order_relaxed);
141 __phase.store(__old_phase + 2, memory_order_release);
142 __phase.notify_all();
143 }
144 return __old_phase;
145 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500146 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500147 void wait(arrival_token&& __old_phase) const
148 {
149 auto const __test_fn = [=]() -> bool {
150 return __phase.load(memory_order_acquire) != __old_phase;
151 };
152 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
153 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500154 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500155 void arrive_and_drop()
156 {
157 __expected_adjustment.fetch_sub(1, memory_order_relaxed);
158 (void)arrive(1);
159 }
160};
161
162#else
163
164/*
165
166The alternative implementation of __barrier_base is a central barrier.
167
168Two versions of this algorithm are provided:
169 1. A fairly straightforward implementation of the litterature for the
170 general case where the completion function is not empty.
171 2. An optimized implementation that exploits 2's complement arithmetic
172 and well-defined overflow in atomic arithmetic, to handle the phase
173 roll-over for free.
174
175*/
176
177template<class _CompletionF>
178class __barrier_base {
179
180 __atomic_base<ptrdiff_t> __expected;
181 __atomic_base<ptrdiff_t> __arrived;
182 _CompletionF __completion;
183 __atomic_base<bool> __phase;
184public:
185 using arrival_token = bool;
186
187 static constexpr ptrdiff_t max() noexcept {
188 return numeric_limits<ptrdiff_t>::max();
189 }
190
191 _LIBCPP_INLINE_VISIBILITY
192 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
193 : __expected(__expected), __arrived(__expected), __completion(move(__completion)), __phase(false)
194 {
195 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500196 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500197 arrival_token arrive(ptrdiff_t update)
198 {
199 auto const __old_phase = __phase.load(memory_order_relaxed);
200 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
201 auto const new_expected = __expected.load(memory_order_relaxed);
202 if(0 == __result) {
203 __completion();
204 __arrived.store(new_expected, memory_order_relaxed);
205 __phase.store(!__old_phase, memory_order_release);
206 __phase.notify_all();
207 }
208 return __old_phase;
209 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500210 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500211 void wait(arrival_token&& __old_phase) const
212 {
213 __phase.wait(__old_phase, memory_order_acquire);
214 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500215 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500216 void arrive_and_drop()
217 {
218 __expected.fetch_sub(1, memory_order_relaxed);
219 (void)arrive(1);
220 }
221};
222
223template<>
224class __barrier_base<__empty_completion> {
225
226 static constexpr uint64_t __expected_unit = 1ull;
227 static constexpr uint64_t __arrived_unit = 1ull << 32;
228 static constexpr uint64_t __expected_mask = __arrived_unit - 1;
229 static constexpr uint64_t __phase_bit = 1ull << 63;
230 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
231
232 __atomic_base<uint64_t> __phase_arrived_expected;
233
234 static _LIBCPP_INLINE_VISIBILITY
235 constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
236 {
237 return ((uint64_t(1u << 31) - __count) << 32)
238 | (uint64_t(1u << 31) - __count);
239 }
240
241public:
242 using arrival_token = uint64_t;
243
244 static constexpr ptrdiff_t max() noexcept {
245 return ptrdiff_t(1u << 31) - 1;
246 }
247
248 _LIBCPP_INLINE_VISIBILITY
249 explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
250 : __phase_arrived_expected(__init(__count))
251 {
252 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500253 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500254 arrival_token arrive(ptrdiff_t update)
255 {
256 auto const __inc = __arrived_unit * update;
257 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
258 if((__old ^ (__old + __inc)) & __phase_bit) {
259 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
260 __phase_arrived_expected.notify_all();
261 }
262 return __old & __phase_bit;
263 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500264 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500265 void wait(arrival_token&& __phase) const
266 {
267 auto const __test_fn = [=]() -> bool {
268 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
269 return ((__current & __phase_bit) != __phase);
270 };
271 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
272 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500273 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500274 void arrive_and_drop()
275 {
276 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
277 (void)arrive(1);
278 }
279};
280
281#endif //_LIBCPP_HAS_NO_TREE_BARRIER
282
283template<class _CompletionF = __empty_completion>
284class barrier {
285
286 __barrier_base<_CompletionF> __b;
287public:
288 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
289
290 static constexpr ptrdiff_t max() noexcept {
291 return __barrier_base<_CompletionF>::max();
292 }
293
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500294 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500295 barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
Arthur O'Dwyer07b22492020-11-27 11:02:06 -0500296 : __b(__count, _VSTD::move(__completion)) {
Olivier Giroux161e6e82020-02-18 09:58:34 -0500297 }
298
299 barrier(barrier const&) = delete;
300 barrier& operator=(barrier const&) = delete;
301
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500302 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500303 arrival_token arrive(ptrdiff_t update = 1)
304 {
305 return __b.arrive(update);
306 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500307 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500308 void wait(arrival_token&& __phase) const
309 {
Arthur O'Dwyer07b22492020-11-27 11:02:06 -0500310 __b.wait(_VSTD::move(__phase));
Olivier Giroux161e6e82020-02-18 09:58:34 -0500311 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500312 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500313 void arrive_and_wait()
314 {
315 wait(arrive());
316 }
Louis Dionne3dde4ab2020-02-24 10:09:29 -0500317 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
Olivier Giroux161e6e82020-02-18 09:58:34 -0500318 void arrive_and_drop()
319 {
320 __b.arrive_and_drop();
321 }
322};
323
324_LIBCPP_END_NAMESPACE_STD
325
Louis Dionne1ab95312020-02-24 18:12:44 -0500326#endif // _LIBCPP_STD_VER >= 14
327
Arthur O'Dwyer2422fa22020-12-02 18:55:01 -0500328_LIBCPP_POP_MACROS
329
Olivier Giroux161e6e82020-02-18 09:58:34 -0500330#endif //_LIBCPP_BARRIER