From 6a0cb1bc106fc07ce0443303bcdb7f7da5131e5c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 18 Oct 2016 15:40:33 +0900 Subject: block: Implement support for zoned block devices Implement zoned block device zone information reporting and reset. Zone information are reported as struct blk_zone. This implementation does not differentiate between host-aware and host-managed device models and is valid for both. Two functions are provided: blkdev_report_zones for discovering the zone configuration of a zoned block device, and blkdev_reset_zones for resetting the write pointer of sequential zones. The helper function blk_queue_zone_size and bdev_zone_size are also provided for, as the name suggest, obtaining the zone size (in 512B sectors) of the zones of the device. Signed-off-by: Hannes Reinecke [Damien: * Removed the zone cache * Implement report zones operation based on earlier proposal by Shaun Tancheff ] Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Shaun Tancheff Tested-by: Shaun Tancheff Signed-off-by: Jens Axboe --- include/uapi/linux/Kbuild | 1 + include/uapi/linux/blkzoned.h | 103 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 include/uapi/linux/blkzoned.h (limited to 'include/uapi') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 6965d0909554..b2166f283da9 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -70,6 +70,7 @@ header-y += bfs_fs.h header-y += binfmts.h header-y += blkpg.h header-y += blktrace_api.h +header-y += blkzoned.h header-y += bpf_common.h header-y += bpf_perf_event.h header-y += bpf.h diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h new file mode 100644 index 000000000000..a3817214b0e0 --- /dev/null +++ b/include/uapi/linux/blkzoned.h @@ -0,0 +1,103 @@ +/* + * Zoned block devices handling. + * + * Copyright (C) 2015 Seagate Technology PLC + * + * Written by: Shaun Tancheff + * + * Modified by: Damien Le Moal + * Copyright (C) 2016 Western Digital + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ +#ifndef _UAPI_BLKZONED_H +#define _UAPI_BLKZONED_H + +#include + +/** + * enum blk_zone_type - Types of zones allowed in a zoned device. + * + * @BLK_ZONE_TYPE_CONVENTIONAL: The zone has no write pointer and can be writen + * randomly. Zone reset has no effect on the zone. + * @BLK_ZONE_TYPE_SEQWRITE_REQ: The zone must be written sequentially + * @BLK_ZONE_TYPE_SEQWRITE_PREF: The zone can be written non-sequentially + * + * Any other value not defined is reserved and must be considered as invalid. + */ +enum blk_zone_type { + BLK_ZONE_TYPE_CONVENTIONAL = 0x1, + BLK_ZONE_TYPE_SEQWRITE_REQ = 0x2, + BLK_ZONE_TYPE_SEQWRITE_PREF = 0x3, +}; + +/** + * enum blk_zone_cond - Condition [state] of a zone in a zoned device. + * + * @BLK_ZONE_COND_NOT_WP: The zone has no write pointer, it is conventional. + * @BLK_ZONE_COND_EMPTY: The zone is empty. + * @BLK_ZONE_COND_IMP_OPEN: The zone is open, but not explicitly opened. + * @BLK_ZONE_COND_EXP_OPEN: The zones was explicitly opened by an + * OPEN ZONE command. + * @BLK_ZONE_COND_CLOSED: The zone was [explicitly] closed after writing. + * @BLK_ZONE_COND_FULL: The zone is marked as full, possibly by a zone + * FINISH ZONE command. + * @BLK_ZONE_COND_READONLY: The zone is read-only. + * @BLK_ZONE_COND_OFFLINE: The zone is offline (sectors cannot be read/written). + * + * The Zone Condition state machine in the ZBC/ZAC standards maps the above + * deinitions as: + * - ZC1: Empty | BLK_ZONE_EMPTY + * - ZC2: Implicit Open | BLK_ZONE_COND_IMP_OPEN + * - ZC3: Explicit Open | BLK_ZONE_COND_EXP_OPEN + * - ZC4: Closed | BLK_ZONE_CLOSED + * - ZC5: Full | BLK_ZONE_FULL + * - ZC6: Read Only | BLK_ZONE_READONLY + * - ZC7: Offline | BLK_ZONE_OFFLINE + * + * Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should + * be considered invalid. + */ +enum blk_zone_cond { + BLK_ZONE_COND_NOT_WP = 0x0, + BLK_ZONE_COND_EMPTY = 0x1, + BLK_ZONE_COND_IMP_OPEN = 0x2, + BLK_ZONE_COND_EXP_OPEN = 0x3, + BLK_ZONE_COND_CLOSED = 0x4, + BLK_ZONE_COND_READONLY = 0xD, + BLK_ZONE_COND_FULL = 0xE, + BLK_ZONE_COND_OFFLINE = 0xF, +}; + +/** + * struct blk_zone - Zone descriptor for BLKREPORTZONE ioctl. + * + * @start: Zone start in 512 B sector units + * @len: Zone length in 512 B sector units + * @wp: Zone write pointer location in 512 B sector units + * @type: see enum blk_zone_type for possible values + * @cond: see enum blk_zone_cond for possible values + * @non_seq: Flag indicating that the zone is using non-sequential resources + * (for host-aware zoned block devices only). + * @reset: Flag indicating that a zone reset is recommended. + * @reserved: Padding to 64 B to match the ZBC/ZAC defined zone descriptor size. + * + * start, len and wp use the regular 512 B sector unit, regardless of the + * device logical block size. The overall structure size is 64 B to match the + * ZBC/ZAC defined zone descriptor and allow support for future additional + * zone information. + */ +struct blk_zone { + __u64 start; /* Zone start sector */ + __u64 len; /* Zone length in number of sectors */ + __u64 wp; /* Zone write pointer position */ + __u8 type; /* Zone type */ + __u8 cond; /* Zone condition */ + __u8 non_seq; /* Non-sequential write resources active */ + __u8 reset; /* Reset write pointer recommended */ + __u8 reserved[36]; +}; + +#endif /* _UAPI_BLKZONED_H */ -- cgit v1.2.3 From 3ed05a987e0f63b21e634101e0b460d32f3581c3 Mon Sep 17 00:00:00 2001 From: Shaun Tancheff Date: Tue, 18 Oct 2016 15:40:35 +0900 Subject: blk-zoned: implement ioctls Adds the new BLKREPORTZONE and BLKRESETZONE ioctls for respectively obtaining the zone configuration of a zoned block device and resetting the write pointer of sequential zones of a zoned block device. The BLKREPORTZONE ioctl maps directly to a single call of the function blkdev_report_zones. The zone information result is passed as an array of struct blk_zone identical to the structure used internally for processing the REQ_OP_ZONE_REPORT operation. The BLKRESETZONE ioctl maps to a single call of the blkdev_reset_zones function. Signed-off-by: Shaun Tancheff Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/uapi/linux/blkzoned.h | 40 ++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/fs.h | 4 ++++ 2 files changed, 44 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index a3817214b0e0..40d1d7bff537 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -16,6 +16,7 @@ #define _UAPI_BLKZONED_H #include +#include /** * enum blk_zone_type - Types of zones allowed in a zoned device. @@ -100,4 +101,43 @@ struct blk_zone { __u8 reserved[36]; }; +/** + * struct blk_zone_report - BLKREPORTZONE ioctl request/reply + * + * @sector: starting sector of report + * @nr_zones: IN maximum / OUT actual + * @reserved: padding to 16 byte alignment + * @zones: Space to hold @nr_zones @zones entries on reply. + * + * The array of at most @nr_zones must follow this structure in memory. + */ +struct blk_zone_report { + __u64 sector; + __u32 nr_zones; + __u8 reserved[4]; + struct blk_zone zones[0]; +} __packed; + +/** + * struct blk_zone_range - BLKRESETZONE ioctl request + * @sector: starting sector of the first zone to issue reset write pointer + * @nr_sectors: Total number of sectors of 1 or more zones to reset + */ +struct blk_zone_range { + __u64 sector; + __u64 nr_sectors; +}; + +/** + * Zoned block device ioctl's: + * + * @BLKREPORTZONE: Get zone information. Takes a zone report as argument. + * The zone report will start from the zone containing the + * sector specified in the report request structure. + * @BLKRESETZONE: Reset the write pointer of the zones in the specified + * sector range. The sector range must be zone aligned. + */ +#define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report) +#define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range) + #endif /* _UAPI_BLKZONED_H */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index acb2b6152ba0..c1d11df07b28 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -225,6 +225,10 @@ struct fsxattr { #define BLKSECDISCARD _IO(0x12,125) #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) +/* + * A jump here: 130-131 are reserved for zoned block devices + * (see uapi/linux/blkzoned.h) + */ #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.2.3 From 9561a7ade0c205bc2ee035a2ac880478dcc1a024 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 22 Nov 2016 14:04:40 -0500 Subject: nbd: add multi-connection support NBD can become contended on its single connection. We have to serialize all writes and we can only process one read response at a time. Fix this by allowing userspace to provide multiple connections to a single nbd device. This coupled with block-mq drastically increases performance in multi-process cases. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Jens Axboe --- include/uapi/linux/nbd.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index e08e413d5f71..f063cff178c5 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -38,11 +38,12 @@ enum { }; /* values for flags field */ -#define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */ -#define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ -#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ +#define NBD_FLAG_HAS_FLAGS (1 << 0) /* nbd-server supports flags */ +#define NBD_FLAG_READ_ONLY (1 << 1) /* device is read-only */ +#define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ /* there is a gap here to match userspace */ -#define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ +#define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ +#define NBD_FLAG_CAN_MULTI_CONN (1 << 7) /* Server supports multiple connections per export. */ /* userspace doesn't need the nbd_device structure */ -- cgit v1.2.3 From 63db89eaf0b9bf856c5e079ed5eab5c3f13165c8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 22 Nov 2016 13:09:50 -0700 Subject: nbd: move multi-connection bit to unused value Bit #7 is already used, move to bit #8 which is the first unused one. Fixes: 9561a7ade0c2 ("nbd: add multi-connection support") Signed-off-by: Jens Axboe --- include/uapi/linux/nbd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h index f063cff178c5..c91c642ea900 100644 --- a/include/uapi/linux/nbd.h +++ b/include/uapi/linux/nbd.h @@ -43,7 +43,7 @@ enum { #define NBD_FLAG_SEND_FLUSH (1 << 2) /* can flush writeback cache */ /* there is a gap here to match userspace */ #define NBD_FLAG_SEND_TRIM (1 << 5) /* send trim/discard */ -#define NBD_FLAG_CAN_MULTI_CONN (1 << 7) /* Server supports multiple connections per export. */ +#define NBD_FLAG_CAN_MULTI_CONN (1 << 8) /* Server supports multiple connections per export. */ /* userspace doesn't need the nbd_device structure */ -- cgit v1.2.3 From 6ea76f33e9ab99c7888547e1acba2baf8e4b5b17 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 2 Dec 2016 00:28:38 -0800 Subject: Add type 0x28 NVME type code to scsi fc headers Signed-off-by: James Smart Acked-by: Johannes Thumshirn Reviewed-by: Jay Freyensee Signed-off-by: Christoph Hellwig --- include/uapi/scsi/fc/fc_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi') diff --git a/include/uapi/scsi/fc/fc_fs.h b/include/uapi/scsi/fc/fc_fs.h index 50f28b143451..dcf314dc2a27 100644 --- a/include/uapi/scsi/fc/fc_fs.h +++ b/include/uapi/scsi/fc/fc_fs.h @@ -190,6 +190,7 @@ enum fc_fh_type { FC_TYPE_FCP = 0x08, /* SCSI FCP */ FC_TYPE_CT = 0x20, /* Fibre Channel Services (FC-CT) */ FC_TYPE_ILS = 0x22, /* internal link service */ + FC_TYPE_NVME = 0x28, /* FC-NVME */ }; /* @@ -203,6 +204,7 @@ enum fc_fh_type { [FC_TYPE_FCP] = "FCP", \ [FC_TYPE_CT] = "CT", \ [FC_TYPE_ILS] = "ILS", \ + [FC_TYPE_NVME] = "NVME", \ } /* -- cgit v1.2.3