blkcg: revert blkcg cleanups series

This reverts a series committed earlier due to null pointer exception
bug report in [1]. It seems there are edge case interactions that I did
not consider and will need some time to understand what causes the
adverse interactions.

The original series can be found in [2] with a follow up series in [3].

[1] https://www.spinics.net/lists/cgroups/msg20719.html
[2] https://lore.kernel.org/lkml/20180911184137.35897-1-dennisszhou@gmail.com/
[3] https://lore.kernel.org/lkml/20181020185612.51587-1-dennis@kernel.org/

This reverts the following commits:
d459d853c2ed, b2c3fa546705, 101246ec02b5, b3b9f24f5fcc, e2b0989954ae,
f0fcb3ec89f3, c839e7a03f92, bdc2491708c4, 74b7c02a9bc1, 5bf9a1f3b4ef,
a7b39b4e961c, 07b05bcc3213, 49f4c2dc2b50, 27e6fa996c53

Signed-off-by: Dennis Zhou <dennis@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b47c7f7..056fb62 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -503,31 +503,23 @@
 	disk_devt((bio)->bi_disk)
 
 #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
-int bio_associate_blkg_from_page(struct bio *bio, struct page *page);
+int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
 #else
-static inline int bio_associate_blkg_from_page(struct bio *bio,
-					       struct page *page) { return 0; }
+static inline int bio_associate_blkcg_from_page(struct bio *bio,
+						struct page *page) {  return 0; }
 #endif
 
 #ifdef CONFIG_BLK_CGROUP
+int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
 int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
-int bio_associate_blkg_from_css(struct bio *bio,
-				struct cgroup_subsys_state *css);
-int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
-int bio_reassociate_blkg(struct request_queue *q, struct bio *bio);
 void bio_disassociate_task(struct bio *bio);
-void bio_clone_blkg_association(struct bio *dst, struct bio *src);
+void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
 #else	/* CONFIG_BLK_CGROUP */
-static inline int bio_associate_blkg_from_css(struct bio *bio,
-					      struct cgroup_subsys_state *css)
-{ return 0; }
-static inline int bio_associate_create_blkg(struct request_queue *q,
-					    struct bio *bio) { return 0; }
-static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
-{ return 0; }
+static inline int bio_associate_blkcg(struct bio *bio,
+			struct cgroup_subsys_state *blkcg_css) { return 0; }
 static inline void bio_disassociate_task(struct bio *bio) { }
-static inline void bio_clone_blkg_association(struct bio *dst,
-					      struct bio *src) { }
+static inline void bio_clone_blkcg_association(struct bio *dst,
+			struct bio *src) { }
 #endif	/* CONFIG_BLK_CGROUP */
 
 #ifdef CONFIG_HIGHMEM
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1e76cee..6d766a1 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -126,7 +126,7 @@
 	struct request_list		rl;
 
 	/* reference count */
-	struct percpu_ref		refcnt;
+	atomic_t			refcnt;
 
 	/* is this blkg online? protected by both blkcg and q locks */
 	bool				online;
@@ -184,8 +184,6 @@
 
 struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
 				      struct request_queue *q, bool update_hint);
-struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
-				      struct request_queue *q);
 struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 				    struct request_queue *q);
 int blkcg_init_queue(struct request_queue *q);
@@ -232,59 +230,22 @@
 		   char *input, struct blkg_conf_ctx *ctx);
 void blkg_conf_finish(struct blkg_conf_ctx *ctx);
 
-/**
- * blkcg_css - find the current css
- *
- * Find the css associated with either the kthread or the current task.
- * This may return a dying css, so it is up to the caller to use tryget logic
- * to confirm it is alive and well.
- */
-static inline struct cgroup_subsys_state *blkcg_css(void)
-{
-	struct cgroup_subsys_state *css;
-
-	css = kthread_blkcg();
-	if (css)
-		return css;
-	return task_css(current, io_cgrp_id);
-}
 
 static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
 {
 	return css ? container_of(css, struct blkcg, css) : NULL;
 }
 
-/**
- * __bio_blkcg - internal version of bio_blkcg for bfq and cfq
- *
- * DO NOT USE.
- * There is a flaw using this version of the function.  In particular, this was
- * used in a broken paradigm where association was called on the given css.  It
- * is possible though that the returned css from task_css() is in the process
- * of dying due to migration of the current task.  So it is improper to assume
- * *_get() is going to succeed.  Both BFQ and CFQ rely on this logic and will
- * take additional work to handle more gracefully.
- */
-static inline struct blkcg *__bio_blkcg(struct bio *bio)
-{
-	if (bio && bio->bi_blkg)
-		return bio->bi_blkg->blkcg;
-	return css_to_blkcg(blkcg_css());
-}
-
-/**
- * bio_blkcg - grab the blkcg associated with a bio
- * @bio: target bio
- *
- * This returns the blkcg associated with a bio, NULL if not associated.
- * Callers are expected to either handle NULL or know association has been
- * done prior to calling this.
- */
 static inline struct blkcg *bio_blkcg(struct bio *bio)
 {
-	if (bio && bio->bi_blkg)
-		return bio->bi_blkg->blkcg;
-	return NULL;
+	struct cgroup_subsys_state *css;
+
+	if (bio && bio->bi_css)
+		return css_to_blkcg(bio->bi_css);
+	css = kthread_blkcg();
+	if (css)
+		return css_to_blkcg(css);
+	return css_to_blkcg(task_css(current, io_cgrp_id));
 }
 
 static inline bool blk_cgroup_congested(void)
@@ -490,35 +451,26 @@
  */
 static inline void blkg_get(struct blkcg_gq *blkg)
 {
-	percpu_ref_get(&blkg->refcnt);
+	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+	atomic_inc(&blkg->refcnt);
 }
 
 /**
- * blkg_tryget - try and get a blkg reference
+ * blkg_try_get - try and get a blkg reference
  * @blkg: blkg to get
  *
  * This is for use when doing an RCU lookup of the blkg.  We may be in the midst
  * of freeing this blkg, so we can only use it if the refcnt is not zero.
  */
-static inline bool blkg_tryget(struct blkcg_gq *blkg)
+static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
 {
-	return percpu_ref_tryget(&blkg->refcnt);
+	if (atomic_inc_not_zero(&blkg->refcnt))
+		return blkg;
+	return NULL;
 }
 
-/**
- * blkg_tryget_closest - try and get a blkg ref on the closet blkg
- * @blkg: blkg to get
- *
- * This walks up the blkg tree to find the closest non-dying blkg and returns
- * the blkg that it did association with as it may not be the passed in blkg.
- */
-static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
-{
-	while (!percpu_ref_tryget(&blkg->refcnt))
-		blkg = blkg->parent;
 
-	return blkg;
-}
+void __blkg_release_rcu(struct rcu_head *rcu);
 
 /**
  * blkg_put - put a blkg reference
@@ -526,7 +478,9 @@
  */
 static inline void blkg_put(struct blkcg_gq *blkg)
 {
-	percpu_ref_put(&blkg->refcnt);
+	WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
+	if (atomic_dec_and_test(&blkg->refcnt))
+		call_rcu(&blkg->rcu_head, __blkg_release_rcu);
 }
 
 /**
@@ -579,36 +533,25 @@
 
 	rcu_read_lock();
 
-	if (bio && bio->bi_blkg) {
-		blkcg = bio->bi_blkg->blkcg;
-		if (blkcg == &blkcg_root)
-			goto rl_use_root;
+	blkcg = bio_blkcg(bio);
 
-		blkg_get(bio->bi_blkg);
-		rcu_read_unlock();
-		return &bio->bi_blkg->rl;
-	}
-
-	blkcg = css_to_blkcg(blkcg_css());
+	/* bypass blkg lookup and use @q->root_rl directly for root */
 	if (blkcg == &blkcg_root)
-		goto rl_use_root;
-
-	blkg = blkg_lookup(blkcg, q);
-	if (unlikely(!blkg))
-		blkg = __blkg_lookup_create(blkcg, q);
-
-	if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg))
-		goto rl_use_root;
-
-	rcu_read_unlock();
-	return &blkg->rl;
+		goto root_rl;
 
 	/*
-	 * Each blkg has its own request_list, however, the root blkcg
-	 * uses the request_queue's root_rl.  This is to avoid most
-	 * overhead for the root blkcg.
+	 * Try to use blkg->rl.  blkg lookup may fail under memory pressure
+	 * or if either the blkcg or queue is going away.  Fall back to
+	 * root_rl in such cases.
 	 */
-rl_use_root:
+	blkg = blkg_lookup(blkcg, q);
+	if (unlikely(!blkg))
+		goto root_rl;
+
+	blkg_get(blkg);
+	rcu_read_unlock();
+	return &blkg->rl;
+root_rl:
 	rcu_read_unlock();
 	return &q->root_rl;
 }
@@ -854,26 +797,32 @@
 				  struct bio *bio) { return false; }
 #endif
 
-
-static inline void blkcg_bio_issue_init(struct bio *bio)
-{
-	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
-}
-
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 					 struct bio *bio)
 {
+	struct blkcg *blkcg;
 	struct blkcg_gq *blkg;
 	bool throtl = false;
 
 	rcu_read_lock();
+	blkcg = bio_blkcg(bio);
 
-	bio_associate_create_blkg(q, bio);
-	blkg = bio->bi_blkg;
+	/* associate blkcg if bio hasn't attached one */
+	bio_associate_blkcg(bio, &blkcg->css);
+
+	blkg = blkg_lookup(blkcg, q);
+	if (unlikely(!blkg)) {
+		spin_lock_irq(q->queue_lock);
+		blkg = blkg_lookup_create(blkcg, q);
+		if (IS_ERR(blkg))
+			blkg = NULL;
+		spin_unlock_irq(q->queue_lock);
+	}
 
 	throtl = blk_throtl_bio(q, blkg, bio);
 
 	if (!throtl) {
+		blkg = blkg ?: q->root_blkg;
 		/*
 		 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
 		 * is a split bio and we would have already accounted for the
@@ -885,8 +834,6 @@
 		blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
 	}
 
-	blkcg_bio_issue_init(bio);
-
 	rcu_read_unlock();
 	return !throtl;
 }
@@ -983,7 +930,6 @@
 static inline void blkcg_deactivate_policy(struct request_queue *q,
 					   const struct blkcg_policy *pol) { }
 
-static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
 static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
 
 static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
@@ -999,7 +945,6 @@
 static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
 static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
 
-static inline void blkcg_bio_issue_init(struct bio *bio) { }
 static inline bool blkcg_bio_issue_check(struct request_queue *q,
 					 struct bio *bio) { return true; }
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 093a818..1dcf652 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -178,6 +178,7 @@
 	 * release.  Read comment on top of bio_associate_current().
 	 */
 	struct io_context	*bi_ioc;
+	struct cgroup_subsys_state *bi_css;
 	struct blkcg_gq		*bi_blkg;
 	struct bio_issue	bi_issue;
 #endif
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b8bcbde..32c5535 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -93,8 +93,6 @@
 
 bool css_has_online_children(struct cgroup_subsys_state *css);
 struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
-struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
-					 struct cgroup_subsys *ss);
 struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
 					     struct cgroup_subsys *ss);
 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 738a0c2..fdfd04e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -246,8 +246,7 @@
  *
  * @bio is a part of the writeback in progress controlled by @wbc.  Perform
  * writeback specific initialization.  This is used to apply the cgroup
- * writeback context.  Must be called after the bio has been associated with
- * a device.
+ * writeback context.
  */
 static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
 {
@@ -258,7 +257,7 @@
 	 * regular writeback instead of writing things out itself.
 	 */
 	if (wbc->wb)
-		bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css);
+		bio_associate_blkcg(bio, wbc->wb->blkcg_css);
 }
 
 #else	/* CONFIG_CGROUP_WRITEBACK */