blob: a0df8217068c966904fec655bda1cb0d5f7f691e [file] [log] [blame]
Matthew Wilcoxf6bb2a22018-04-10 16:36:52 -07001/* SPDX-License-Identifier: GPL-2.0+ */
2#ifndef _LINUX_XARRAY_H
3#define _LINUX_XARRAY_H
4/*
5 * eXtensible Arrays
6 * Copyright (c) 2017 Microsoft Corporation
Matthew Wilcox3d0186b2018-06-16 17:32:07 -04007 * Author: Matthew Wilcox <willy@infradead.org>
Matthew Wilcox3159f942017-11-03 13:30:42 -04008 *
9 * See Documentation/core-api/xarray.rst for how to use the XArray.
Matthew Wilcoxf6bb2a22018-04-10 16:36:52 -070010 */
11
Matthew Wilcox3159f942017-11-03 13:30:42 -040012#include <linux/bug.h>
Matthew Wilcoxf8d5d0c2017-11-07 16:30:10 -050013#include <linux/compiler.h>
14#include <linux/kconfig.h>
Matthew Wilcoxad3d6c72017-11-07 14:57:46 -050015#include <linux/kernel.h>
16#include <linux/rcupdate.h>
Matthew Wilcoxf6bb2a22018-04-10 16:36:52 -070017#include <linux/spinlock.h>
Matthew Wilcox3159f942017-11-03 13:30:42 -040018#include <linux/types.h>
19
20/*
21 * The bottom two bits of the entry determine how the XArray interprets
22 * the contents:
23 *
24 * 00: Pointer entry
25 * 10: Internal entry
26 * x1: Value entry or tagged pointer
27 *
28 * Attempting to store internal entries in the XArray is a bug.
Matthew Wilcox02c02bf2017-11-03 23:09:45 -040029 *
30 * Most internal entries are pointers to the next node in the tree.
31 * The following internal entries have a special meaning:
32 *
33 * 0-62: Sibling entries
34 * 256: Retry entry
Matthew Wilcoxad3d6c72017-11-07 14:57:46 -050035 *
36 * Errors are also represented as internal entries, but use the negative
37 * space (-4094 to -2). They're never stored in the slots array; only
38 * returned by the normal API.
Matthew Wilcox3159f942017-11-03 13:30:42 -040039 */
40
41#define BITS_PER_XA_VALUE (BITS_PER_LONG - 1)
42
43/**
44 * xa_mk_value() - Create an XArray entry from an integer.
45 * @v: Value to store in XArray.
46 *
47 * Context: Any context.
48 * Return: An entry suitable for storing in the XArray.
49 */
50static inline void *xa_mk_value(unsigned long v)
51{
52 WARN_ON((long)v < 0);
53 return (void *)((v << 1) | 1);
54}
55
56/**
57 * xa_to_value() - Get value stored in an XArray entry.
58 * @entry: XArray entry.
59 *
60 * Context: Any context.
61 * Return: The value stored in the XArray entry.
62 */
63static inline unsigned long xa_to_value(const void *entry)
64{
65 return (unsigned long)entry >> 1;
66}
67
68/**
69 * xa_is_value() - Determine if an entry is a value.
70 * @entry: XArray entry.
71 *
72 * Context: Any context.
73 * Return: True if the entry is a value, false if it is a pointer.
74 */
75static inline bool xa_is_value(const void *entry)
76{
77 return (unsigned long)entry & 1;
78}
79
80/**
81 * xa_tag_pointer() - Create an XArray entry for a tagged pointer.
82 * @p: Plain pointer.
83 * @tag: Tag value (0, 1 or 3).
84 *
85 * If the user of the XArray prefers, they can tag their pointers instead
86 * of storing value entries. Three tags are available (0, 1 and 3).
87 * These are distinct from the xa_mark_t as they are not replicated up
88 * through the array and cannot be searched for.
89 *
90 * Context: Any context.
91 * Return: An XArray entry.
92 */
93static inline void *xa_tag_pointer(void *p, unsigned long tag)
94{
95 return (void *)((unsigned long)p | tag);
96}
97
98/**
99 * xa_untag_pointer() - Turn an XArray entry into a plain pointer.
100 * @entry: XArray entry.
101 *
102 * If you have stored a tagged pointer in the XArray, call this function
103 * to get the untagged version of the pointer.
104 *
105 * Context: Any context.
106 * Return: A pointer.
107 */
108static inline void *xa_untag_pointer(void *entry)
109{
110 return (void *)((unsigned long)entry & ~3UL);
111}
112
113/**
114 * xa_pointer_tag() - Get the tag stored in an XArray entry.
115 * @entry: XArray entry.
116 *
117 * If you have stored a tagged pointer in the XArray, call this function
118 * to get the tag of that pointer.
119 *
120 * Context: Any context.
121 * Return: A tag.
122 */
123static inline unsigned int xa_pointer_tag(void *entry)
124{
125 return (unsigned long)entry & 3UL;
126}
Matthew Wilcoxf6bb2a22018-04-10 16:36:52 -0700127
Matthew Wilcox02c02bf2017-11-03 23:09:45 -0400128/*
129 * xa_mk_internal() - Create an internal entry.
130 * @v: Value to turn into an internal entry.
131 *
132 * Context: Any context.
133 * Return: An XArray internal entry corresponding to this value.
134 */
135static inline void *xa_mk_internal(unsigned long v)
136{
137 return (void *)((v << 2) | 2);
138}
139
140/*
141 * xa_to_internal() - Extract the value from an internal entry.
142 * @entry: XArray entry.
143 *
144 * Context: Any context.
145 * Return: The value which was stored in the internal entry.
146 */
147static inline unsigned long xa_to_internal(const void *entry)
148{
149 return (unsigned long)entry >> 2;
150}
151
152/*
153 * xa_is_internal() - Is the entry an internal entry?
154 * @entry: XArray entry.
155 *
156 * Context: Any context.
157 * Return: %true if the entry is an internal entry.
158 */
159static inline bool xa_is_internal(const void *entry)
160{
161 return ((unsigned long)entry & 3) == 2;
162}
163
Matthew Wilcoxf8d5d0c2017-11-07 16:30:10 -0500164/**
Matthew Wilcoxad3d6c72017-11-07 14:57:46 -0500165 * xa_is_err() - Report whether an XArray operation returned an error
166 * @entry: Result from calling an XArray function
167 *
168 * If an XArray operation cannot complete an operation, it will return
169 * a special value indicating an error. This function tells you
170 * whether an error occurred; xa_err() tells you which error occurred.
171 *
172 * Context: Any context.
173 * Return: %true if the entry indicates an error.
174 */
175static inline bool xa_is_err(const void *entry)
176{
177 return unlikely(xa_is_internal(entry));
178}
179
180/**
181 * xa_err() - Turn an XArray result into an errno.
182 * @entry: Result from calling an XArray function.
183 *
184 * If an XArray operation cannot complete an operation, it will return
185 * a special pointer value which encodes an errno. This function extracts
186 * the errno from the pointer value, or returns 0 if the pointer does not
187 * represent an errno.
188 *
189 * Context: Any context.
190 * Return: A negative errno or 0.
191 */
192static inline int xa_err(void *entry)
193{
194 /* xa_to_internal() would not do sign extension. */
195 if (xa_is_err(entry))
196 return (long)entry >> 2;
197 return 0;
198}
199
200/**
Matthew Wilcoxf8d5d0c2017-11-07 16:30:10 -0500201 * struct xarray - The anchor of the XArray.
202 * @xa_lock: Lock that protects the contents of the XArray.
203 *
204 * To use the xarray, define it statically or embed it in your data structure.
205 * It is a very small data structure, so it does not usually make sense to
206 * allocate it separately and keep a pointer to it in your data structure.
207 *
208 * You may use the xa_lock to protect your own data structures as well.
209 */
210/*
211 * If all of the entries in the array are NULL, @xa_head is a NULL pointer.
212 * If the only non-NULL entry in the array is at index 0, @xa_head is that
213 * entry. If any other entry in the array is non-NULL, @xa_head points
214 * to an @xa_node.
215 */
216struct xarray {
217 spinlock_t xa_lock;
218/* private: The rest of the data structure is not to be used directly. */
219 gfp_t xa_flags;
220 void __rcu * xa_head;
221};
222
223#define XARRAY_INIT(name, flags) { \
224 .xa_lock = __SPIN_LOCK_UNLOCKED(name.xa_lock), \
225 .xa_flags = flags, \
226 .xa_head = NULL, \
227}
228
229/**
230 * DEFINE_XARRAY_FLAGS() - Define an XArray with custom flags.
231 * @name: A string that names your XArray.
232 * @flags: XA_FLAG values.
233 *
234 * This is intended for file scope definitions of XArrays. It declares
235 * and initialises an empty XArray with the chosen name and flags. It is
236 * equivalent to calling xa_init_flags() on the array, but it does the
237 * initialisation at compiletime instead of runtime.
238 */
239#define DEFINE_XARRAY_FLAGS(name, flags) \
240 struct xarray name = XARRAY_INIT(name, flags)
241
242/**
243 * DEFINE_XARRAY() - Define an XArray.
244 * @name: A string that names your XArray.
245 *
246 * This is intended for file scope definitions of XArrays. It declares
247 * and initialises an empty XArray with the chosen name. It is equivalent
248 * to calling xa_init() on the array, but it does the initialisation at
249 * compiletime instead of runtime.
250 */
251#define DEFINE_XARRAY(name) DEFINE_XARRAY_FLAGS(name, 0)
252
253void xa_init_flags(struct xarray *, gfp_t flags);
Matthew Wilcoxad3d6c72017-11-07 14:57:46 -0500254void *xa_load(struct xarray *, unsigned long index);
Matthew Wilcoxf8d5d0c2017-11-07 16:30:10 -0500255
256/**
257 * xa_init() - Initialise an empty XArray.
258 * @xa: XArray.
259 *
260 * An empty XArray is full of NULL entries.
261 *
262 * Context: Any context.
263 */
264static inline void xa_init(struct xarray *xa)
265{
266 xa_init_flags(xa, 0);
267}
268
Matthew Wilcoxad3d6c72017-11-07 14:57:46 -0500269/**
270 * xa_empty() - Determine if an array has any present entries.
271 * @xa: XArray.
272 *
273 * Context: Any context.
274 * Return: %true if the array contains only NULL pointers.
275 */
276static inline bool xa_empty(const struct xarray *xa)
277{
278 return xa->xa_head == NULL;
279}
280
Matthew Wilcoxf6bb2a22018-04-10 16:36:52 -0700281#define xa_trylock(xa) spin_trylock(&(xa)->xa_lock)
282#define xa_lock(xa) spin_lock(&(xa)->xa_lock)
283#define xa_unlock(xa) spin_unlock(&(xa)->xa_lock)
284#define xa_lock_bh(xa) spin_lock_bh(&(xa)->xa_lock)
285#define xa_unlock_bh(xa) spin_unlock_bh(&(xa)->xa_lock)
286#define xa_lock_irq(xa) spin_lock_irq(&(xa)->xa_lock)
287#define xa_unlock_irq(xa) spin_unlock_irq(&(xa)->xa_lock)
288#define xa_lock_irqsave(xa, flags) \
289 spin_lock_irqsave(&(xa)->xa_lock, flags)
290#define xa_unlock_irqrestore(xa, flags) \
291 spin_unlock_irqrestore(&(xa)->xa_lock, flags)
292
Matthew Wilcox02c02bf2017-11-03 23:09:45 -0400293/* Everything below here is the Advanced API. Proceed with caution. */
294
295/*
296 * The xarray is constructed out of a set of 'chunks' of pointers. Choosing
297 * the best chunk size requires some tradeoffs. A power of two recommends
298 * itself so that we can walk the tree based purely on shifts and masks.
299 * Generally, the larger the better; as the number of slots per level of the
300 * tree increases, the less tall the tree needs to be. But that needs to be
301 * balanced against the memory consumption of each node. On a 64-bit system,
302 * xa_node is currently 576 bytes, and we get 7 of them per 4kB page. If we
303 * doubled the number of slots per node, we'd get only 3 nodes per 4kB page.
304 */
305#ifndef XA_CHUNK_SHIFT
306#define XA_CHUNK_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
307#endif
308#define XA_CHUNK_SIZE (1UL << XA_CHUNK_SHIFT)
309#define XA_CHUNK_MASK (XA_CHUNK_SIZE - 1)
Matthew Wilcox01959df2017-11-09 09:23:56 -0500310#define XA_MAX_MARKS 3
311#define XA_MARK_LONGS DIV_ROUND_UP(XA_CHUNK_SIZE, BITS_PER_LONG)
312
313/*
314 * @count is the count of every non-NULL element in the ->slots array
315 * whether that is a value entry, a retry entry, a user pointer,
316 * a sibling entry or a pointer to the next level of the tree.
317 * @nr_values is the count of every element in ->slots which is
318 * either a value entry or a sibling of a value entry.
319 */
320struct xa_node {
321 unsigned char shift; /* Bits remaining in each slot */
322 unsigned char offset; /* Slot offset in parent */
323 unsigned char count; /* Total entry count */
324 unsigned char nr_values; /* Value entry count */
325 struct xa_node __rcu *parent; /* NULL at top of tree */
326 struct xarray *array; /* The array we belong to */
327 union {
328 struct list_head private_list; /* For tree user */
329 struct rcu_head rcu_head; /* Used when freeing node */
330 };
331 void __rcu *slots[XA_CHUNK_SIZE];
332 union {
333 unsigned long tags[XA_MAX_MARKS][XA_MARK_LONGS];
334 unsigned long marks[XA_MAX_MARKS][XA_MARK_LONGS];
335 };
336};
Matthew Wilcox02c02bf2017-11-03 23:09:45 -0400337
Matthew Wilcoxad3d6c72017-11-07 14:57:46 -0500338void xa_dump(const struct xarray *);
339void xa_dump_node(const struct xa_node *);
340
341#ifdef XA_DEBUG
342#define XA_BUG_ON(xa, x) do { \
343 if (x) { \
344 xa_dump(xa); \
345 BUG(); \
346 } \
347 } while (0)
348#define XA_NODE_BUG_ON(node, x) do { \
349 if (x) { \
350 if (node) xa_dump_node(node); \
351 BUG(); \
352 } \
353 } while (0)
354#else
355#define XA_BUG_ON(xa, x) do { } while (0)
356#define XA_NODE_BUG_ON(node, x) do { } while (0)
357#endif
358
359/* Private */
360static inline void *xa_head(const struct xarray *xa)
361{
362 return rcu_dereference_check(xa->xa_head,
363 lockdep_is_held(&xa->xa_lock));
364}
365
366/* Private */
367static inline void *xa_head_locked(const struct xarray *xa)
368{
369 return rcu_dereference_protected(xa->xa_head,
370 lockdep_is_held(&xa->xa_lock));
371}
372
373/* Private */
374static inline void *xa_entry(const struct xarray *xa,
375 const struct xa_node *node, unsigned int offset)
376{
377 XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE);
378 return rcu_dereference_check(node->slots[offset],
379 lockdep_is_held(&xa->xa_lock));
380}
381
382/* Private */
383static inline void *xa_entry_locked(const struct xarray *xa,
384 const struct xa_node *node, unsigned int offset)
385{
386 XA_NODE_BUG_ON(node, offset >= XA_CHUNK_SIZE);
387 return rcu_dereference_protected(node->slots[offset],
388 lockdep_is_held(&xa->xa_lock));
389}
390
391/* Private */
392static inline struct xa_node *xa_to_node(const void *entry)
393{
394 return (struct xa_node *)((unsigned long)entry - 2);
395}
396
Matthew Wilcox02c02bf2017-11-03 23:09:45 -0400397/* Private */
398static inline bool xa_is_node(const void *entry)
399{
400 return xa_is_internal(entry) && (unsigned long)entry > 4096;
401}
402
403/* Private */
404static inline void *xa_mk_sibling(unsigned int offset)
405{
406 return xa_mk_internal(offset);
407}
408
409/* Private */
410static inline unsigned long xa_to_sibling(const void *entry)
411{
412 return xa_to_internal(entry);
413}
414
415/**
416 * xa_is_sibling() - Is the entry a sibling entry?
417 * @entry: Entry retrieved from the XArray
418 *
419 * Return: %true if the entry is a sibling entry.
420 */
421static inline bool xa_is_sibling(const void *entry)
422{
423 return IS_ENABLED(CONFIG_XARRAY_MULTI) && xa_is_internal(entry) &&
424 (entry < xa_mk_sibling(XA_CHUNK_SIZE - 1));
425}
426
427#define XA_RETRY_ENTRY xa_mk_internal(256)
428
Matthew Wilcoxad3d6c72017-11-07 14:57:46 -0500429/**
430 * xa_is_retry() - Is the entry a retry entry?
431 * @entry: Entry retrieved from the XArray
432 *
433 * Return: %true if the entry is a retry entry.
434 */
435static inline bool xa_is_retry(const void *entry)
436{
437 return unlikely(entry == XA_RETRY_ENTRY);
438}
439
440/**
441 * typedef xa_update_node_t - A callback function from the XArray.
442 * @node: The node which is being processed
443 *
444 * This function is called every time the XArray updates the count of
445 * present and value entries in a node. It allows advanced users to
446 * maintain the private_list in the node.
447 *
448 * Context: The xa_lock is held and interrupts may be disabled.
449 * Implementations should not drop the xa_lock, nor re-enable
450 * interrupts.
451 */
452typedef void (*xa_update_node_t)(struct xa_node *node);
453
454/*
455 * The xa_state is opaque to its users. It contains various different pieces
456 * of state involved in the current operation on the XArray. It should be
457 * declared on the stack and passed between the various internal routines.
458 * The various elements in it should not be accessed directly, but only
459 * through the provided accessor functions. The below documentation is for
460 * the benefit of those working on the code, not for users of the XArray.
461 *
462 * @xa_node usually points to the xa_node containing the slot we're operating
463 * on (and @xa_offset is the offset in the slots array). If there is a
464 * single entry in the array at index 0, there are no allocated xa_nodes to
465 * point to, and so we store %NULL in @xa_node. @xa_node is set to
466 * the value %XAS_RESTART if the xa_state is not walked to the correct
467 * position in the tree of nodes for this operation. If an error occurs
468 * during an operation, it is set to an %XAS_ERROR value. If we run off the
469 * end of the allocated nodes, it is set to %XAS_BOUNDS.
470 */
471struct xa_state {
472 struct xarray *xa;
473 unsigned long xa_index;
474 unsigned char xa_shift;
475 unsigned char xa_sibs;
476 unsigned char xa_offset;
477 unsigned char xa_pad; /* Helps gcc generate better code */
478 struct xa_node *xa_node;
479 struct xa_node *xa_alloc;
480 xa_update_node_t xa_update;
481};
482
483/*
484 * We encode errnos in the xas->xa_node. If an error has happened, we need to
485 * drop the lock to fix it, and once we've done so the xa_state is invalid.
486 */
487#define XA_ERROR(errno) ((struct xa_node *)(((unsigned long)errno << 2) | 2UL))
488#define XAS_BOUNDS ((struct xa_node *)1UL)
489#define XAS_RESTART ((struct xa_node *)3UL)
490
491#define __XA_STATE(array, index, shift, sibs) { \
492 .xa = array, \
493 .xa_index = index, \
494 .xa_shift = shift, \
495 .xa_sibs = sibs, \
496 .xa_offset = 0, \
497 .xa_pad = 0, \
498 .xa_node = XAS_RESTART, \
499 .xa_alloc = NULL, \
500 .xa_update = NULL \
501}
502
503/**
504 * XA_STATE() - Declare an XArray operation state.
505 * @name: Name of this operation state (usually xas).
506 * @array: Array to operate on.
507 * @index: Initial index of interest.
508 *
509 * Declare and initialise an xa_state on the stack.
510 */
511#define XA_STATE(name, array, index) \
512 struct xa_state name = __XA_STATE(array, index, 0, 0)
513
514/**
515 * XA_STATE_ORDER() - Declare an XArray operation state.
516 * @name: Name of this operation state (usually xas).
517 * @array: Array to operate on.
518 * @index: Initial index of interest.
519 * @order: Order of entry.
520 *
521 * Declare and initialise an xa_state on the stack. This variant of
522 * XA_STATE() allows you to specify the 'order' of the element you
523 * want to operate on.`
524 */
525#define XA_STATE_ORDER(name, array, index, order) \
526 struct xa_state name = __XA_STATE(array, \
527 (index >> order) << order, \
528 order - (order % XA_CHUNK_SHIFT), \
529 (1U << (order % XA_CHUNK_SHIFT)) - 1)
530
531#define xas_marked(xas, mark) xa_marked((xas)->xa, (mark))
532#define xas_trylock(xas) xa_trylock((xas)->xa)
533#define xas_lock(xas) xa_lock((xas)->xa)
534#define xas_unlock(xas) xa_unlock((xas)->xa)
535#define xas_lock_bh(xas) xa_lock_bh((xas)->xa)
536#define xas_unlock_bh(xas) xa_unlock_bh((xas)->xa)
537#define xas_lock_irq(xas) xa_lock_irq((xas)->xa)
538#define xas_unlock_irq(xas) xa_unlock_irq((xas)->xa)
539#define xas_lock_irqsave(xas, flags) \
540 xa_lock_irqsave((xas)->xa, flags)
541#define xas_unlock_irqrestore(xas, flags) \
542 xa_unlock_irqrestore((xas)->xa, flags)
543
544/**
545 * xas_error() - Return an errno stored in the xa_state.
546 * @xas: XArray operation state.
547 *
548 * Return: 0 if no error has been noted. A negative errno if one has.
549 */
550static inline int xas_error(const struct xa_state *xas)
551{
552 return xa_err(xas->xa_node);
553}
554
555/**
556 * xas_set_err() - Note an error in the xa_state.
557 * @xas: XArray operation state.
558 * @err: Negative error number.
559 *
560 * Only call this function with a negative @err; zero or positive errors
561 * will probably not behave the way you think they should. If you want
562 * to clear the error from an xa_state, use xas_reset().
563 */
564static inline void xas_set_err(struct xa_state *xas, long err)
565{
566 xas->xa_node = XA_ERROR(err);
567}
568
569/**
570 * xas_invalid() - Is the xas in a retry or error state?
571 * @xas: XArray operation state.
572 *
573 * Return: %true if the xas cannot be used for operations.
574 */
575static inline bool xas_invalid(const struct xa_state *xas)
576{
577 return (unsigned long)xas->xa_node & 3;
578}
579
580/**
581 * xas_valid() - Is the xas a valid cursor into the array?
582 * @xas: XArray operation state.
583 *
584 * Return: %true if the xas can be used for operations.
585 */
586static inline bool xas_valid(const struct xa_state *xas)
587{
588 return !xas_invalid(xas);
589}
590
591/**
592 * xas_reset() - Reset an XArray operation state.
593 * @xas: XArray operation state.
594 *
595 * Resets the error or walk state of the @xas so future walks of the
596 * array will start from the root. Use this if you have dropped the
597 * xarray lock and want to reuse the xa_state.
598 *
599 * Context: Any context.
600 */
601static inline void xas_reset(struct xa_state *xas)
602{
603 xas->xa_node = XAS_RESTART;
604}
605
606/**
607 * xas_retry() - Retry the operation if appropriate.
608 * @xas: XArray operation state.
609 * @entry: Entry from xarray.
610 *
611 * The advanced functions may sometimes return an internal entry, such as
612 * a retry entry or a zero entry. This function sets up the @xas to restart
613 * the walk from the head of the array if needed.
614 *
615 * Context: Any context.
616 * Return: true if the operation needs to be retried.
617 */
618static inline bool xas_retry(struct xa_state *xas, const void *entry)
619{
620 if (!xa_is_retry(entry))
621 return false;
622 xas_reset(xas);
623 return true;
624}
625
626void *xas_load(struct xa_state *);
627
628/**
629 * xas_reload() - Refetch an entry from the xarray.
630 * @xas: XArray operation state.
631 *
632 * Use this function to check that a previously loaded entry still has
633 * the same value. This is useful for the lockless pagecache lookup where
634 * we walk the array with only the RCU lock to protect us, lock the page,
635 * then check that the page hasn't moved since we looked it up.
636 *
637 * The caller guarantees that @xas is still valid. If it may be in an
638 * error or restart state, call xas_load() instead.
639 *
640 * Return: The entry at this location in the xarray.
641 */
642static inline void *xas_reload(struct xa_state *xas)
643{
644 struct xa_node *node = xas->xa_node;
645
646 if (node)
647 return xa_entry(xas->xa, node, xas->xa_offset);
648 return xa_head(xas->xa);
649}
650
Matthew Wilcoxf6bb2a22018-04-10 16:36:52 -0700651#endif /* _LINUX_XARRAY_H */