summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-29 11:51:49 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-29 11:51:49 -0800
commit0a4b6e2f80aad46fb55a5cf7b1664c0aef030ee0 (patch)
treecefccd67dc1f27bb45830f6b8065dd4a1c05e83b /include/linux
parent9697e9da84299d0d715d515dd2cc48f1eceb277d (diff)
parent796baeeef85a40b3495a907fb7425086e7010102 (diff)
Merge branch 'for-4.16/block' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block IO related changes for the 4.16 kernel. Nothing major in this pull request, but a good amount of improvements and fixes all over the map. This contains: - BFQ improvements, fixes, and cleanups from Angelo, Chiara, and Paolo. - Support for SMR zones for deadline and mq-deadline from Damien and Christoph. - Set of fixes for bcache by way of Michael Lyle, including fixes from himself, Kent, Rui, Tang, and Coly. - Series from Matias for lightnvm with fixes from Hans Holmberg, Javier, and Matias. Mostly centered around pblk, and the removing rrpc 1.2 in preparation for supporting 2.0. - A couple of NVMe pull requests from Christoph. Nothing major in here, just fixes and cleanups, and support for command tracing from Johannes. - Support for blk-throttle for tracking reads and writes separately. From Joseph Qi. A few cleanups/fixes also for blk-throttle from Weiping. - Series from Mike Snitzer that enables dm to register its queue more logically, something that's alwways been problematic on dm since it's a stacked device. - Series from Ming cleaning up some of the bio accessor use, in preparation for supporting multipage bvecs. - Various fixes from Ming closing up holes around queue mapping and quiescing. - BSD partition fix from Richard Narron, fixing a problem where we can't mount newer (10/11) FreeBSD partitions. - Series from Tejun reworking blk-mq timeout handling. The previous scheme relied on atomic bits, but it had races where we would think a request had timed out if it to reused at the wrong time. - null_blk now supports faking timeouts, to enable us to better exercise and test that functionality separately. From me. - Kill the separate atomic poll bit in the request struct. After this, we don't use the atomic bits on blk-mq anymore at all. From me. - sgl_alloc/free helpers from Bart. - Heavily contended tag case scalability improvement from me. - Various little fixes and cleanups from Arnd, Bart, Corentin, Douglas, Eryu, Goldwyn, and myself" * 'for-4.16/block' of git://git.kernel.dk/linux-block: (186 commits) block: remove smart1,2.h nvme: add tracepoint for nvme_complete_rq nvme: add tracepoint for nvme_setup_cmd nvme-pci: introduce RECONNECTING state to mark initializing procedure nvme-rdma: remove redundant boolean for inline_data nvme: don't free uuid pointer before printing it nvme-pci: Suspend queues after deleting them bsg: use pr_debug instead of hand crafted macros blk-mq-debugfs: don't allow write on attributes with seq_operations set nvme-pci: Fix queue double allocations block: Set BIO_TRACE_COMPLETION on new bio during split blk-throttle: use queue_is_rq_based block: Remove kblockd_schedule_delayed_work{,_on}() blk-mq: Avoid that blk_mq_delay_run_hw_queue() introduces unintended delays blk-mq: Rename blk_mq_request_direct_issue() into blk_mq_request_issue_directly() lib/scatterlist: Fix chaining support in sgl_alloc_order() blk-throttle: track read and write request individually block: add bdev_read_only() checks to common helpers block: fail op_is_write() requests to read-only partitions blk-throttle: export io_serviced_recursive, io_service_bytes_recursive ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bio.h24
-rw-r--r--include/linux/blk-cgroup.h8
-rw-r--r--include/linux/blk-mq.h3
-rw-r--r--include/linux/blk_types.h28
-rw-r--r--include/linux/blkdev.h172
-rw-r--r--include/linux/bvec.h9
-rw-r--r--include/linux/elevator.h2
-rw-r--r--include/linux/genhd.h5
-rw-r--r--include/linux/lightnvm.h125
-rw-r--r--include/linux/nvme.h22
-rw-r--r--include/linux/scatterlist.h11
11 files changed, 290 insertions, 119 deletions
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 23d29b39f71e..d0eb659fa733 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -300,6 +300,29 @@ static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
bv->bv_len = iter.bi_bvec_done;
}
+static inline unsigned bio_pages_all(struct bio *bio)
+{
+ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+ return bio->bi_vcnt;
+}
+
+static inline struct bio_vec *bio_first_bvec_all(struct bio *bio)
+{
+ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+ return bio->bi_io_vec;
+}
+
+static inline struct page *bio_first_page_all(struct bio *bio)
+{
+ return bio_first_bvec_all(bio)->bv_page;
+}
+
+static inline struct bio_vec *bio_last_bvec_all(struct bio *bio)
+{
+ WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED));
+ return &bio->bi_io_vec[bio->bi_vcnt - 1];
+}
+
enum bip_flags {
BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */
BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */
@@ -477,7 +500,6 @@ static inline void bio_flush_dcache_pages(struct bio *bi)
#endif
extern void bio_copy_data(struct bio *dst, struct bio *src);
-extern int bio_alloc_pages(struct bio *bio, gfp_t gfp);
extern void bio_free_pages(struct bio *bio);
extern struct bio *bio_copy_user_iov(struct request_queue *,
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index e9825ff57b15..69bea82ebeb1 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -660,12 +660,14 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
struct blkg_rwstat *from)
{
- struct blkg_rwstat v = blkg_rwstat_read(from);
+ u64 sum[BLKG_RWSTAT_NR];
int i;
for (i = 0; i < BLKG_RWSTAT_NR; i++)
- atomic64_add(atomic64_read(&v.aux_cnt[i]) +
- atomic64_read(&from->aux_cnt[i]),
+ sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]);
+
+ for (i = 0; i < BLKG_RWSTAT_NR; i++)
+ atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]),
&to->aux_cnt[i]);
}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 95c9a5c862e2..8efcf49796a3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -51,6 +51,7 @@ struct blk_mq_hw_ctx {
unsigned int queue_num;
atomic_t nr_active;
+ unsigned int nr_expired;
struct hlist_node cpuhp_dead;
struct kobject kobj;
@@ -65,7 +66,7 @@ struct blk_mq_hw_ctx {
#endif
/* Must be the last member - see also blk_mq_hw_ctx_size(). */
- struct srcu_struct queue_rq_srcu[0];
+ struct srcu_struct srcu[0];
};
struct blk_mq_tag_set {
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 9e7d8bd776d2..c5d3db0d83f8 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -39,6 +39,34 @@ typedef u8 __bitwise blk_status_t;
#define BLK_STS_AGAIN ((__force blk_status_t)12)
+/**
+ * blk_path_error - returns true if error may be path related
+ * @error: status the request was completed with
+ *
+ * Description:
+ * This classifies block error status into non-retryable errors and ones
+ * that may be successful if retried on a failover path.
+ *
+ * Return:
+ * %false - retrying failover path will not help
+ * %true - may succeed if retried
+ */
+static inline bool blk_path_error(blk_status_t error)
+{
+ switch (error) {
+ case BLK_STS_NOTSUPP:
+ case BLK_STS_NOSPC:
+ case BLK_STS_TARGET:
+ case BLK_STS_NEXUS:
+ case BLK_STS_MEDIUM:
+ case BLK_STS_PROTECTION:
+ return false;
+ }
+
+ /* Anything else could be a path failure, so should be retried */
+ return true;
+}
+
struct blk_issue_stat {
u64 stat;
};
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0ce8a372d506..4f3df807cf8f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -27,6 +27,8 @@
#include <linux/percpu-refcount.h>
#include <linux/scatterlist.h>
#include <linux/blkzoned.h>
+#include <linux/seqlock.h>
+#include <linux/u64_stats_sync.h>
struct module;
struct scsi_ioctl_command;
@@ -121,6 +123,12 @@ typedef __u32 __bitwise req_flags_t;
/* Look at ->special_vec for the actual data payload instead of the
bio chain. */
#define RQF_SPECIAL_PAYLOAD ((__force req_flags_t)(1 << 18))
+/* The per-zone write lock is held for this request */
+#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
+/* timeout is expired */
+#define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20))
+/* already slept for hybrid poll */
+#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21))
/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
@@ -133,12 +141,6 @@ typedef __u32 __bitwise req_flags_t;
* especially blk_mq_rq_ctx_init() to take care of the added fields.
*/
struct request {
- struct list_head queuelist;
- union {
- struct __call_single_data csd;
- u64 fifo_time;
- };
-
struct request_queue *q;
struct blk_mq_ctx *mq_ctx;
@@ -148,8 +150,6 @@ struct request {
int internal_tag;
- unsigned long atomic_flags;
-
/* the following two fields are internal, NEVER access directly */
unsigned int __data_len; /* total data len */
int tag;
@@ -158,6 +158,8 @@ struct request {
struct bio *bio;
struct bio *biotail;
+ struct list_head queuelist;
+
/*
* The hash is used inside the scheduler, and killed once the
* request reaches the dispatch list. The ipi_list is only used
@@ -205,19 +207,16 @@ struct request {
struct hd_struct *part;
unsigned long start_time;
struct blk_issue_stat issue_stat;
-#ifdef CONFIG_BLK_CGROUP
- struct request_list *rl; /* rl this rq is alloced from */
- unsigned long long start_time_ns;
- unsigned long long io_start_time_ns; /* when passed to hardware */
-#endif
/* Number of scatter-gather DMA addr+len pairs after
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
+
#if defined(CONFIG_BLK_DEV_INTEGRITY)
unsigned short nr_integrity_segments;
#endif
+ unsigned short write_hint;
unsigned short ioprio;
unsigned int timeout;
@@ -226,11 +225,37 @@ struct request {
unsigned int extra_len; /* length of alignment and padding */
- unsigned short write_hint;
+ /*
+ * On blk-mq, the lower bits of ->gstate (generation number and
+ * state) carry the MQ_RQ_* state value and the upper bits the
+ * generation number which is monotonically incremented and used to
+ * distinguish the reuse instances.
+ *
+ * ->gstate_seq allows updates to ->gstate and other fields
+ * (currently ->deadline) during request start to be read
+ * atomically from the timeout path, so that it can operate on a
+ * coherent set of information.
+ */
+ seqcount_t gstate_seq;
+ u64 gstate;
+
+ /*
+ * ->aborted_gstate is used by the timeout to claim a specific
+ * recycle instance of this request. See blk_mq_timeout_work().
+ */
+ struct u64_stats_sync aborted_gstate_sync;
+ u64 aborted_gstate;
+
+ /* access through blk_rq_set_deadline, blk_rq_deadline */
+ unsigned long __deadline;
- unsigned long deadline;
struct list_head timeout_list;
+ union {
+ struct __call_single_data csd;
+ u64 fifo_time;
+ };
+
/*
* completion callback.
*/
@@ -239,6 +264,12 @@ struct request {
/* for bidi */
struct request *next_rq;
+
+#ifdef CONFIG_BLK_CGROUP
+ struct request_list *rl; /* rl this rq is alloced from */
+ unsigned long long start_time_ns;
+ unsigned long long io_start_time_ns; /* when passed to hardware */
+#endif
};
static inline bool blk_op_is_scsi(unsigned int op)
@@ -564,6 +595,22 @@ struct request_queue {
struct queue_limits limits;
/*
+ * Zoned block device information for request dispatch control.
+ * nr_zones is the total number of zones of the device. This is always
+ * 0 for regular block devices. seq_zones_bitmap is a bitmap of nr_zones
+ * bits which indicates if a zone is conventional (bit clear) or
+ * sequential (bit set). seq_zones_wlock is a bitmap of nr_zones
+ * bits which indicates if a zone is write locked, that is, if a write
+ * request targeting the zone was dispatched. All three fields are
+ * initialized by the low level device driver (e.g. scsi/sd.c).
+ * Stacking drivers (device mappers) may or may not initialize
+ * these fields.
+ */
+ unsigned int nr_zones;
+ unsigned long *seq_zones_bitmap;
+ unsigned long *seq_zones_wlock;
+
+ /*
* sg stuff
*/
unsigned int sg_timeout;
@@ -807,6 +854,27 @@ static inline unsigned int blk_queue_zone_sectors(struct request_queue *q)
return blk_queue_is_zoned(q) ? q->limits.chunk_sectors : 0;
}
+static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
+{
+ return q->nr_zones;
+}
+
+static inline unsigned int blk_queue_zone_no(struct request_queue *q,
+ sector_t sector)
+{
+ if (!blk_queue_is_zoned(q))
+ return 0;
+ return sector >> ilog2(q->limits.chunk_sectors);
+}
+
+static inline bool blk_queue_zone_is_seq(struct request_queue *q,
+ sector_t sector)
+{
+ if (!blk_queue_is_zoned(q) || !q->seq_zones_bitmap)
+ return false;
+ return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
+}
+
static inline bool rq_is_sync(struct request *rq)
{
return op_is_sync(rq->cmd_flags);
@@ -1046,6 +1114,16 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
return blk_rq_cur_bytes(rq) >> 9;
}
+static inline unsigned int blk_rq_zone_no(struct request *rq)
+{
+ return blk_queue_zone_no(rq->q, blk_rq_pos(rq));
+}
+
+static inline unsigned int blk_rq_zone_is_seq(struct request *rq)
+{
+ return blk_queue_zone_is_seq(rq->q, blk_rq_pos(rq));
+}
+
/*
* Some commands like WRITE SAME have a payload or data transfer size which
* is different from the size of the request. Any driver that supports such
@@ -1595,7 +1673,15 @@ static inline unsigned int bdev_zone_sectors(struct block_device *bdev)
if (q)
return blk_queue_zone_sectors(q);
+ return 0;
+}
+
+static inline unsigned int bdev_nr_zones(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ if (q)
+ return blk_queue_nr_zones(q);
return 0;
}
@@ -1731,8 +1817,6 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
int kblockd_schedule_work(struct work_struct *work);
int kblockd_schedule_work_on(int cpu, struct work_struct *work);
-int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
-int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
#ifdef CONFIG_BLK_CGROUP
@@ -1971,6 +2055,60 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
extern int bdev_read_page(struct block_device *, sector_t, struct page *);
extern int bdev_write_page(struct block_device *, sector_t, struct page *,
struct writeback_control *);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+bool blk_req_needs_zone_write_lock(struct request *rq);
+void __blk_req_zone_write_lock(struct request *rq);
+void __blk_req_zone_write_unlock(struct request *rq);
+
+static inline void blk_req_zone_write_lock(struct request *rq)
+{
+ if (blk_req_needs_zone_write_lock(rq))
+ __blk_req_zone_write_lock(rq);
+}
+
+static inline void blk_req_zone_write_unlock(struct request *rq)
+{
+ if (rq->rq_flags & RQF_ZONE_WRITE_LOCKED)
+ __blk_req_zone_write_unlock(rq);
+}
+
+static inline bool blk_req_zone_is_write_locked(struct request *rq)
+{
+ return rq->q->seq_zones_wlock &&
+ test_bit(blk_rq_zone_no(rq), rq->q->seq_zones_wlock);
+}
+
+static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
+{
+ if (!blk_req_needs_zone_write_lock(rq))
+ return true;
+ return !blk_req_zone_is_write_locked(rq);
+}
+#else
+static inline bool blk_req_needs_zone_write_lock(struct request *rq)
+{
+ return false;
+}
+
+static inline void blk_req_zone_write_lock(struct request *rq)
+{
+}
+
+static inline void blk_req_zone_write_unlock(struct request *rq)
+{
+}
+static inline bool blk_req_zone_is_write_locked(struct request *rq)
+{
+ return false;
+}
+
+static inline bool blk_req_can_dispatch_to_zone(struct request *rq)
+{
+ return true;
+}
+#endif /* CONFIG_BLK_DEV_ZONED */
+
#else /* CONFIG_BLOCK */
struct block_device;
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index ec8a4d7af6bd..fe7a22dd133b 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -125,4 +125,13 @@ static inline bool bvec_iter_rewind(const struct bio_vec *bv,
((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \
bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len))
+/* for iterating one bio from start to end */
+#define BVEC_ITER_ALL_INIT (struct bvec_iter) \
+{ \
+ .bi_sector = 0, \
+ .bi_size = UINT_MAX, \
+ .bi_idx = 0, \
+ .bi_bvec_done = 0, \
+}
+
#endif /* __LINUX_BVEC_ITER_H */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index 3d794b3dc532..6d9e230dffd2 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -198,8 +198,6 @@ extern bool elv_attempt_insert_merge(struct request_queue *, struct request *);
extern void elv_requeue_request(struct request_queue *, struct request *);
extern struct request *elv_former_request(struct request_queue *, struct request *);
extern struct request *elv_latter_request(struct request_queue *, struct request *);
-extern int elv_register_queue(struct request_queue *q);
-extern void elv_unregister_queue(struct request_queue *q);
extern int elv_may_queue(struct request_queue *, unsigned int);
extern void elv_completed_request(struct request_queue *, struct request *);
extern int elv_set_request(struct request_queue *q, struct request *rq,
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 5144ebe046c9..5e3531027b51 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -395,6 +395,11 @@ static inline void add_disk(struct gendisk *disk)
{
device_add_disk(NULL, disk);
}
+extern void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk);
+static inline void add_disk_no_queue_reg(struct gendisk *disk)
+{
+ device_add_disk_no_queue_reg(NULL, disk);
+}
extern void del_gendisk(struct gendisk *gp);
extern struct gendisk *get_gendisk(dev_t dev, int *partno);
diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
index 2d1d9de06728..7f4b60abdf27 100644
--- a/include/linux/lightnvm.h
+++ b/include/linux/lightnvm.h
@@ -50,10 +50,7 @@ struct nvm_id;
struct nvm_dev;
struct nvm_tgt_dev;
-typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *);
typedef int (nvm_id_fn)(struct nvm_dev *, struct nvm_id *);
-typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32,
- nvm_l2p_update_fn *, void *);
typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *);
typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int);
typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *);
@@ -66,7 +63,6 @@ typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t);
struct nvm_dev_ops {
nvm_id_fn *identity;
- nvm_get_l2p_tbl_fn *get_l2p_tbl;
nvm_op_bb_tbl_fn *get_bb_tbl;
nvm_op_set_bb_fn *set_bb_tbl;
@@ -112,8 +108,6 @@ enum {
NVM_RSP_WARN_HIGHECC = 0x4700,
/* Device opcodes */
- NVM_OP_HBREAD = 0x02,
- NVM_OP_HBWRITE = 0x81,
NVM_OP_PWRITE = 0x91,
NVM_OP_PREAD = 0x92,
NVM_OP_ERASE = 0x90,
@@ -165,12 +159,16 @@ struct nvm_id_group {
u8 fmtype;
u8 num_ch;
u8 num_lun;
- u8 num_pln;
- u16 num_blk;
- u16 num_pg;
- u16 fpg_sz;
+ u16 num_chk;
+ u16 clba;
u16 csecs;
u16 sos;
+
+ u16 ws_min;
+ u16 ws_opt;
+ u16 ws_seq;
+ u16 ws_per_chk;
+
u32 trdt;
u32 trdm;
u32 tprt;
@@ -181,7 +179,10 @@ struct nvm_id_group {
u32 mccap;
u16 cpar;
- struct nvm_id_lp_tbl lptbl;
+ /* 1.2 compatibility */
+ u8 num_pln;
+ u16 num_pg;
+ u16 fpg_sz;
};
struct nvm_addr_format {
@@ -217,6 +218,10 @@ struct nvm_target {
#define ADDR_EMPTY (~0ULL)
+#define NVM_TARGET_DEFAULT_OP (101)
+#define NVM_TARGET_MIN_OP (3)
+#define NVM_TARGET_MAX_OP (80)
+
#define NVM_VERSION_MAJOR 1
#define NVM_VERSION_MINOR 0
#define NVM_VERSION_PATCH 0
@@ -239,7 +244,6 @@ struct nvm_rq {
void *meta_list;
dma_addr_t dma_meta_list;
- struct completion *wait;
nvm_end_io_fn *end_io;
uint8_t opcode;
@@ -268,31 +272,38 @@ enum {
NVM_BLK_ST_BAD = 0x8, /* Bad block */
};
+
/* Device generic information */
struct nvm_geo {
+ /* generic geometry */
int nr_chnls;
- int nr_luns;
- int luns_per_chnl; /* -1 if channels are not symmetric */
- int nr_planes;
- int sec_per_pg; /* only sectors for a single page */
- int pgs_per_blk;
- int blks_per_lun;
- int fpg_size;
- int pfpg_size; /* size of buffer if all pages are to be read */
+ int all_luns; /* across channels */
+ int nr_luns; /* per channel */
+ int nr_chks; /* per lun */
+
int sec_size;
int oob_size;
int mccap;
- struct nvm_addr_format ppaf;
- /* Calculated/Cached values. These do not reflect the actual usable
- * blocks at run-time.
- */
+ int sec_per_chk;
+ int sec_per_lun;
+
+ int ws_min;
+ int ws_opt;
+ int ws_seq;
+ int ws_per_chk;
+
int max_rq_size;
- int plane_mode; /* drive device in single, double or quad mode */
+ int op;
+
+ struct nvm_addr_format ppaf;
+
+ /* Legacy 1.2 specific geometry */
+ int plane_mode; /* drive device in single, double or quad mode */
+ int nr_planes;
+ int sec_per_pg; /* only sectors for a single page */
int sec_per_pl; /* all sectors across planes */
- int sec_per_blk;
- int sec_per_lun;
};
/* sub-device structure */
@@ -320,10 +331,6 @@ struct nvm_dev {
/* Device information */
struct nvm_geo geo;
- /* lower page table */
- int lps_per_blk;
- int *lptbl;
-
unsigned long total_secs;
unsigned long *lun_map;
@@ -346,36 +353,6 @@ struct nvm_dev {
struct list_head targets;
};
-static inline struct ppa_addr linear_to_generic_addr(struct nvm_geo *geo,
- u64 pba)
-{
- struct ppa_addr l;
- int secs, pgs, blks, luns;
- sector_t ppa = pba;
-
- l.ppa = 0;
-
- div_u64_rem(ppa, geo->sec_per_pg, &secs);
- l.g.sec = secs;
-
- sector_div(ppa, geo->sec_per_pg);
- div_u64_rem(ppa, geo->pgs_per_blk, &pgs);
- l.g.pg = pgs;
-
- sector_div(ppa, geo->pgs_per_blk);
- div_u64_rem(ppa, geo->blks_per_lun, &blks);
- l.g.blk = blks;
-
- sector_div(ppa, geo->blks_per_lun);
- div_u64_rem(ppa, geo->luns_per_chnl, &luns);
- l.g.lun = luns;
-
- sector_div(ppa, geo->luns_per_chnl);
- l.g.ch = ppa;
-
- return l;
-}
-
static inline struct ppa_addr generic_to_dev_addr(struct nvm_tgt_dev *tgt_dev,
struct ppa_addr r)
{
@@ -418,25 +395,6 @@ static inline struct ppa_addr dev_to_generic_addr(struct nvm_tgt_dev *tgt_dev,
return l;
}
-static inline int ppa_empty(struct ppa_addr ppa_addr)
-{
- return (ppa_addr.ppa == ADDR_EMPTY);
-}
-
-static inline void ppa_set_empty(struct ppa_addr *ppa_addr)
-{
- ppa_addr->ppa = ADDR_EMPTY;
-}
-
-static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2)
-{
- if (ppa_empty(ppa1) || ppa_empty(ppa2))
- return 0;
-
- return ((ppa1.g.ch == ppa2.g.ch) && (ppa1.g.lun == ppa2.g.lun) &&
- (ppa1.g.blk == ppa2.g.blk));
-}
-
typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *);
typedef sector_t (nvm_tgt_capacity_fn)(void *);
typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *,
@@ -481,17 +439,10 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *,
extern int nvm_max_phys_sects(struct nvm_tgt_dev *);
extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *);
extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *);
-extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int);
-extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *,
- void *);
-extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t);
-extern void nvm_put_area(struct nvm_tgt_dev *, sector_t);
extern void nvm_end_io(struct nvm_rq *);
extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int);
extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *);
-extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int);
-
#else /* CONFIG_NVM */
struct nvm_dev_ops;
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index aea87f0d917b..4112e2bd747f 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -124,14 +124,20 @@ enum {
#define NVME_CMB_BIR(cmbloc) ((cmbloc) & 0x7)
#define NVME_CMB_OFST(cmbloc) (((cmbloc) >> 12) & 0xfffff)
-#define NVME_CMB_SZ(cmbsz) (((cmbsz) >> 12) & 0xfffff)
-#define NVME_CMB_SZU(cmbsz) (((cmbsz) >> 8) & 0xf)
-
-#define NVME_CMB_WDS(cmbsz) ((cmbsz) & 0x10)
-#define NVME_CMB_RDS(cmbsz) ((cmbsz) & 0x8)
-#define NVME_CMB_LISTS(cmbsz) ((cmbsz) & 0x4)
-#define NVME_CMB_CQS(cmbsz) ((cmbsz) & 0x2)
-#define NVME_CMB_SQS(cmbsz) ((cmbsz) & 0x1)
+
+enum {
+ NVME_CMBSZ_SQS = 1 << 0,
+ NVME_CMBSZ_CQS = 1 << 1,
+ NVME_CMBSZ_LISTS = 1 << 2,
+ NVME_CMBSZ_RDS = 1 << 3,
+ NVME_CMBSZ_WDS = 1 << 4,
+
+ NVME_CMBSZ_SZ_SHIFT = 12,
+ NVME_CMBSZ_SZ_MASK = 0xfffff,
+
+ NVME_CMBSZ_SZU_SHIFT = 8,
+ NVME_CMBSZ_SZU_MASK = 0xf,
+};
/*
* Submission and Completion Queue Entry Sizes for the NVM command set.
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index b7c83254c566..22b2131bcdcd 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -276,6 +276,17 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
unsigned int n_pages, unsigned int offset,
unsigned long size, gfp_t gfp_mask);
+#ifdef CONFIG_SGL_ALLOC
+struct scatterlist *sgl_alloc_order(unsigned long long length,
+ unsigned int order, bool chainable,
+ gfp_t gfp, unsigned int *nent_p);
+struct scatterlist *sgl_alloc(unsigned long long length, gfp_t gfp,
+ unsigned int *nent_p);
+void sgl_free_n_order(struct scatterlist *sgl, int nents, int order);
+void sgl_free_order(struct scatterlist *sgl, int order);
+void sgl_free(struct scatterlist *sgl);
+#endif /* CONFIG_SGL_ALLOC */
+
size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf,
size_t buflen, off_t skip, bool to_buffer);