From a492f075450f3ba87de36e5ffe92a9d0c7af9723 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Thu, 28 Aug 2014 08:15:21 -0600 Subject: block,scsi: fixup blk_get_request dead queue scenarios The blk_get_request function may fail in low-memory conditions or during device removal (even if __GFP_WAIT is set). To distinguish between these errors, modify the blk_get_request call stack to return the appropriate ERR_PTR. Verify that all callers check the return status and consider IS_ERR instead of a simple NULL pointer check. For consistency, make a similar change to the blk_mq_alloc_request leg of blk_get_request. It may fail if the queue is dead, or the caller was unwilling to wait. Signed-off-by: Joe Lawrence Acked-by: Jiri Kosina [for pktdvd] Acked-by: Boaz Harrosh [for osd] Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/blk-core.c | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index c359d72e9d76..93603e6ff479 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -933,9 +933,9 @@ static struct io_context *rq_ioc(struct bio *bio) * Get a free request from @q. This function may fail under memory * pressure or if @q is dead. * - * Must be callled with @q->queue_lock held and, - * Returns %NULL on failure, with @q->queue_lock held. - * Returns !%NULL on success, with @q->queue_lock *not held*. + * Must be called with @q->queue_lock held and, + * Returns ERR_PTR on failure, with @q->queue_lock held. + * Returns request pointer on success, with @q->queue_lock *not held*. */ static struct request *__get_request(struct request_list *rl, int rw_flags, struct bio *bio, gfp_t gfp_mask) @@ -949,7 +949,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, int may_queue; if (unlikely(blk_queue_dying(q))) - return NULL; + return ERR_PTR(-ENODEV); may_queue = elv_may_queue(q, rw_flags); if (may_queue == ELV_MQUEUE_NO) @@ -974,7 +974,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, * process is not a "batcher", and not * exempted by the IO scheduler */ - return NULL; + return ERR_PTR(-ENOMEM); } } } @@ -992,7 +992,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, * allocated with any setting of ->nr_requests */ if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) - return NULL; + return ERR_PTR(-ENOMEM); q->nr_rqs[is_sync]++; rl->count[is_sync]++; @@ -1097,7 +1097,7 @@ fail_alloc: rq_starved: if (unlikely(rl->count[is_sync] == 0)) rl->starved[is_sync] = 1; - return NULL; + return ERR_PTR(-ENOMEM); } /** @@ -1110,9 +1110,9 @@ rq_starved: * Get a free request from @q. If %__GFP_WAIT is set in @gfp_mask, this * function keeps retrying under memory pressure and fails iff @q is dead. * - * Must be callled with @q->queue_lock held and, - * Returns %NULL on failure, with @q->queue_lock held. - * Returns !%NULL on success, with @q->queue_lock *not held*. + * Must be called with @q->queue_lock held and, + * Returns ERR_PTR on failure, with @q->queue_lock held. + * Returns request pointer on success, with @q->queue_lock *not held*. */ static struct request *get_request(struct request_queue *q, int rw_flags, struct bio *bio, gfp_t gfp_mask) @@ -1125,12 +1125,12 @@ static struct request *get_request(struct request_queue *q, int rw_flags, rl = blk_get_rl(q, bio); /* transferred to @rq on success */ retry: rq = __get_request(rl, rw_flags, bio, gfp_mask); - if (rq) + if (!IS_ERR(rq)) return rq; if (!(gfp_mask & __GFP_WAIT) || unlikely(blk_queue_dying(q))) { blk_put_rl(rl); - return NULL; + return rq; } /* wait on @rl and retry */ @@ -1167,7 +1167,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, spin_lock_irq(q->queue_lock); rq = get_request(q, rw, NULL, gfp_mask); - if (!rq) + if (IS_ERR(rq)) spin_unlock_irq(q->queue_lock); /* q->queue_lock is unlocked at this point */ @@ -1219,8 +1219,8 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio, { struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); - if (unlikely(!rq)) - return ERR_PTR(-ENOMEM); + if (IS_ERR(rq)) + return rq; blk_rq_set_block_pc(rq); @@ -1615,8 +1615,8 @@ get_rq: * Returns with the queue unlocked. */ req = get_request(q, rw_flags, bio, GFP_NOIO); - if (unlikely(!req)) { - bio_endio(bio, -ENODEV); /* @q is dead */ + if (IS_ERR(req)) { + bio_endio(bio, PTR_ERR(req)); /* @q is dead */ goto out_unlock; } -- cgit v1.2.3 From ff9ea323816dc1c8ac7144afd4eab3ac97704430 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:03:56 +0900 Subject: block, bdi: an active gendisk always has a request_queue associated with it bdev_get_queue() returns the request_queue associated with the specified block_device. blk_get_backing_dev_info() makes use of bdev_get_queue() to determine the associated bdi given a block_device. All the callers of bdev_get_queue() including blk_get_backing_dev_info() assume that bdev_get_queue() may return NULL and implement NULL handling; however, bdev_get_queue() requires the passed in block_device is opened and attached to its gendisk. Because an active gendisk always has a valid request_queue associated with it, bdev_get_queue() can never return NULL and neither can blk_get_backing_dev_info(). Make it clear that neither of the two functions can return NULL and remove NULL handling from all the callers. Signed-off-by: Tejun Heo Cc: Chris Mason Cc: Dave Chinner Signed-off-by: Jens Axboe --- block/blk-core.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 93603e6ff479..817446175489 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -83,18 +83,14 @@ void blk_queue_congestion_threshold(struct request_queue *q) * @bdev: device * * Locates the passed device's request queue and returns the address of its - * backing_dev_info - * - * Will return NULL if the request queue cannot be located. + * backing_dev_info. This function can only be called if @bdev is opened + * and the return value is never NULL. */ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) { - struct backing_dev_info *ret = NULL; struct request_queue *q = bdev_get_queue(bdev); - if (q) - ret = &q->backing_dev_info; - return ret; + return &q->backing_dev_info; } EXPORT_SYMBOL(blk_get_backing_dev_info); -- cgit v1.2.3 From f355265571440a7db16e784b6edf4e7d26971a03 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 25 Sep 2014 23:23:40 +0800 Subject: block: introduce blk_init_flush and its pair These two temporary functions are introduced for holding flush initialization and de-initialization, so that we can introduce 'flush queue' easier in the following patch. And once 'flush queue' and its allocation/free functions are ready, they will be removed for sake of code readability. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 6946a4275e6f..0a9d17269957 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -705,8 +705,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, if (!q) return NULL; - q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); - if (!q->flush_rq) + if (blk_init_flush(q)) return NULL; if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) @@ -742,7 +741,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, return q; fail: - kfree(q->flush_rq); + blk_exit_flush(q); return NULL; } EXPORT_SYMBOL(blk_init_allocated_queue); -- cgit v1.2.3 From 3c09676c12b1dabf84acbb5849bfc54acadaf092 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 25 Sep 2014 23:23:41 +0800 Subject: block: move flush initialization to blk_flush_init These fields are always used with the flush request, so initialize them together. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 0a9d17269957..222fe84d6ac4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -600,9 +600,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) #ifdef CONFIG_BLK_CGROUP INIT_LIST_HEAD(&q->blkg_list); #endif - INIT_LIST_HEAD(&q->flush_queue[0]); - INIT_LIST_HEAD(&q->flush_queue[1]); - INIT_LIST_HEAD(&q->flush_data_in_flight); INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); kobject_init(&q->kobj, &blk_queue_ktype); -- cgit v1.2.3 From 7c94e1c157a227837b04f02f5edeff8301410ba2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 25 Sep 2014 23:23:43 +0800 Subject: block: introduce blk_flush_queue to drive flush machinery This patch introduces 'struct blk_flush_queue' and puts all flush machinery related fields into this structure, so that - flush implementation details aren't exposed to driver - it is easy to convert to per dispatch-queue flush machinery This patch is basically a mechanical replacement. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 222fe84d6ac4..cfaca8ca6cc4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -390,11 +390,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all) * be drained. Check all the queues and counters. */ if (drain_all) { + struct blk_flush_queue *fq = blk_get_flush_queue(q); drain |= !list_empty(&q->queue_head); for (i = 0; i < 2; i++) { drain |= q->nr_rqs[i]; drain |= q->in_flight[i]; - drain |= !list_empty(&q->flush_queue[i]); + if (fq) + drain |= !list_empty(&fq->flush_queue[i]); } } -- cgit v1.2.3 From ba483388e3058b3e412632a84e6bf1f134beaf3d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 25 Sep 2014 23:23:44 +0800 Subject: block: remove blk_init_flush() and its pair Now mission of the two helpers is over, and just call blk_alloc_flush_queue() and blk_free_flush_queue() directly. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index cfaca8ca6cc4..dba0a8350807 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -704,7 +704,8 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, if (!q) return NULL; - if (blk_init_flush(q)) + q->fq = blk_alloc_flush_queue(q); + if (!q->fq) return NULL; if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) @@ -740,7 +741,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, return q; fail: - blk_exit_flush(q); + blk_free_flush_queue(q->fq); return NULL; } EXPORT_SYMBOL(blk_init_allocated_queue); -- cgit v1.2.3 From e97c293cdf77263abdc021de280516e0017afc84 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 25 Sep 2014 23:23:46 +0800 Subject: block: introduce 'blk_mq_ctx' parameter to blk_get_flush_queue This patch adds 'blk_mq_ctx' parameter to blk_get_flush_queue(), so that this function can find the corresponding blk_flush_queue bound with current mq context since the flush queue will become per hw-queue. For legacy queue, the parameter can be simply 'NULL'. For multiqueue case, the parameter should be set as the context from which the related request is originated. With this context info, the hw queue and related flush queue can be found easily. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index dba0a8350807..b1dd4e086740 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -390,7 +390,7 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all) * be drained. Check all the queues and counters. */ if (drain_all) { - struct blk_flush_queue *fq = blk_get_flush_queue(q); + struct blk_flush_queue *fq = blk_get_flush_queue(q, NULL); drain |= !list_empty(&q->queue_head); for (i = 0; i < 2; i++) { drain |= q->nr_rqs[i]; -- cgit v1.2.3 From f70ced09170761acb69840cafaace4abc72cba4b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 25 Sep 2014 23:23:47 +0800 Subject: blk-mq: support per-distpatch_queue flush machinery This patch supports to run one single flush machinery for each blk-mq dispatch queue, so that: - current init_request and exit_request callbacks can cover flush request too, then the buggy copying way of initializing flush request's pdu can be fixed - flushing performance gets improved in case of multi hw-queue In fio sync write test over virtio-blk(4 hw queues, ioengine=sync, iodepth=64, numjobs=4, bs=4K), it is observed that througput gets increased a lot over my test environment: - throughput: +70% in case of virtio-blk over null_blk - throughput: +30% in case of virtio-blk over SSD image The multi virtqueue feature isn't merged to QEMU yet, and patches for the feature can be found in below tree: git://kernel.ubuntu.com/ming/qemu.git v2.1.0-mq.4 And simply passing 'num_queues=4 vectors=5' should be enough to enable multi queue(quad queue) feature for QEMU virtio-blk. Suggested-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index b1dd4e086740..e1c2775c7597 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -704,7 +704,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, if (!q) return NULL; - q->fq = blk_alloc_flush_queue(q); + q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0); if (!q->fq) return NULL; -- cgit v1.2.3 From 4a0efdc933680d908de11712a774a2c9492c3d5a Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 1 Oct 2014 14:32:31 +0200 Subject: block: misplaced rq_complete tracepoint The rq_complete tracepoint was never issued for empty requests, causing the resulting blktrace information to never show any completion for those request. Signed-off-by: Hannes Reinecke Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index e1c2775c7597..4aa9ccd02a50 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2400,11 +2400,11 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) { int total_bytes; + trace_block_rq_complete(req->q, req, nr_bytes); + if (!req->bio) return false; - trace_block_rq_complete(req->q, req, nr_bytes); - /* * For fs requests, rq is just carrier of independent bio's * and each partial completion should be handled separately. -- cgit v1.2.3 From 11dfce509eaa35e8fc81cb50d0910c0e235fd7e2 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Fri, 3 Oct 2014 17:27:11 -0400 Subject: block: use bio_clone_fast() in blk_rq_prep_clone() Request cloning clones bios in the request to track the completion of each bio. For that purpose, we can use bio_clone_fast() instead of bio_clone() to avoid unnecessary allocation and copy of bvecs. This patch reduces memory footprint of request-based device-mapper (about 1-4KB for each request) and is a preparation for further reduction of memory usage by removing unused bvec mempool. Signed-off-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 4aa9ccd02a50..ffcb47af35f3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2926,7 +2926,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, blk_rq_init(NULL, rq); __rq_for_each_bio(bio_src, rq_src) { - bio = bio_clone_bioset(bio_src, gfp_mask, bs); + bio = bio_clone_fast(bio_src, gfp_mask, bs); if (!bio) goto free_and_out; -- cgit v1.2.3 From ef3ecb66bcd6b2076dc8782e1315cf2807b73c0c Mon Sep 17 00:00:00 2001 From: Robert Elliott Date: Wed, 27 Aug 2014 10:50:31 -0500 Subject: block: make blk_update_request print prefix match ratelimited prefix In blk_update_request, change the printk_ratelimited prefix from end_request to blk_update_request so it matches the name printed if rate limiting occurs. Old: [10234.933106] blk_update_request: 174 callbacks suppressed [10234.934940] end_request: critical target error, dev sdr, sector 16 [10234.949788] end_request: critical target error, dev sdr, sector 16 New: [16863.445173] blk_update_request: 398 callbacks suppressed [16863.447029] blk_update_request: critical target error, dev sdr, sector 1442066176 [16863.449383] blk_update_request: critical target error, dev sdr, sector 802802888 [16863.451680] blk_update_request: critical target error, dev sdr, sector 1609535456 Signed-off-by: Robert Elliott Reviewed-by: Webb Scales Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index ffcb47af35f3..ecc124ec53bb 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2444,8 +2444,8 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) error_type = "I/O"; break; } - printk_ratelimited(KERN_ERR "end_request: %s error, dev %s, sector %llu\n", - error_type, req->rq_disk ? + printk_ratelimited(KERN_ERR "%s: %s error, dev %s, sector %llu\n", + __func__, error_type, req->rq_disk ? req->rq_disk->disk_name : "?", (unsigned long long)blk_rq_pos(req)); -- cgit v1.2.3 From 7b2b10e0e2c65ebc11314e1af9924d0824ec1562 Mon Sep 17 00:00:00 2001 From: Robert Elliott Date: Wed, 27 Aug 2014 10:50:36 -0500 Subject: block: include func name in __get_request prints In __get_request calls to printk_ratelimited, include the function name so the callbacks suppressed message matches the messages that are printed, and add "dev" before the device name so it matches other block layer messages. Signed-off-by: Robert Elliott Reviewed-by: Webb Scales Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index ecc124ec53bb..d6ec7db9a9f4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1060,8 +1060,8 @@ fail_elvpriv: * shouldn't stall IO. Treat this request as !elvpriv. This will * disturb iosched and blkcg but weird is bettern than dead. */ - printk_ratelimited(KERN_WARNING "%s: request aux data allocation failed, iosched may be disturbed\n", - dev_name(q->backing_dev_info.dev)); + printk_ratelimited(KERN_WARNING "%s: dev %s: request aux data allocation failed, iosched may be disturbed\n", + __func__, dev_name(q->backing_dev_info.dev)); rq->cmd_flags &= ~REQ_ELVPRIV; rq->elv.icq = NULL; -- cgit v1.2.3