summaryrefslogtreecommitdiff
path: root/fs/btrfs/compression.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/compression.c')
-rw-r--r--fs/btrfs/compression.c365
1 files changed, 105 insertions, 260 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index f4564f32f6d9..e84d22c5c6a8 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -136,109 +136,14 @@ static int compression_decompress(int type, struct list_head *ws,
static int btrfs_decompress_bio(struct compressed_bio *cb);
-static inline int compressed_bio_size(struct btrfs_fs_info *fs_info,
- unsigned long disk_size)
-{
- return sizeof(struct compressed_bio) +
- (DIV_ROUND_UP(disk_size, fs_info->sectorsize)) * fs_info->csum_size;
-}
-
-static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
- u64 disk_start)
-{
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
- const u32 csum_size = fs_info->csum_size;
- const u32 sectorsize = fs_info->sectorsize;
- struct page *page;
- unsigned int i;
- char *kaddr;
- u8 csum[BTRFS_CSUM_SIZE];
- struct compressed_bio *cb = bio->bi_private;
- u8 *cb_sum = cb->sums;
-
- if ((inode->flags & BTRFS_INODE_NODATASUM) ||
- test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
- return 0;
-
- shash->tfm = fs_info->csum_shash;
-
- for (i = 0; i < cb->nr_pages; i++) {
- u32 pg_offset;
- u32 bytes_left = PAGE_SIZE;
- page = cb->compressed_pages[i];
-
- /* Determine the remaining bytes inside the page first */
- if (i == cb->nr_pages - 1)
- bytes_left = cb->compressed_len - i * PAGE_SIZE;
-
- /* Hash through the page sector by sector */
- for (pg_offset = 0; pg_offset < bytes_left;
- pg_offset += sectorsize) {
- kaddr = kmap_atomic(page);
- crypto_shash_digest(shash, kaddr + pg_offset,
- sectorsize, csum);
- kunmap_atomic(kaddr);
-
- if (memcmp(&csum, cb_sum, csum_size) != 0) {
- btrfs_print_data_csum_error(inode, disk_start,
- csum, cb_sum, cb->mirror_num);
- if (btrfs_bio(bio)->device)
- btrfs_dev_stat_inc_and_print(
- btrfs_bio(bio)->device,
- BTRFS_DEV_STAT_CORRUPTION_ERRS);
- return -EIO;
- }
- cb_sum += csum_size;
- disk_start += sectorsize;
- }
- }
- return 0;
-}
-
-/*
- * Reduce bio and io accounting for a compressed_bio with its corresponding bio.
- *
- * Return true if there is no pending bio nor io.
- * Return false otherwise.
- */
-static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *bio)
-{
- struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
- unsigned int bi_size = 0;
- bool last_io = false;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- /*
- * At endio time, bi_iter.bi_size doesn't represent the real bio size.
- * Thus here we have to iterate through all segments to grab correct
- * bio size.
- */
- bio_for_each_segment_all(bvec, bio, iter_all)
- bi_size += bvec->bv_len;
-
- if (bio->bi_status)
- cb->status = bio->bi_status;
-
- ASSERT(bi_size && bi_size <= cb->compressed_len);
- last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
- &cb->pending_sectors);
- /*
- * Here we must wake up the possible error handler after all other
- * operations on @cb finished, or we can race with
- * finish_compressed_bio_*() which may free @cb.
- */
- wake_up_var(cb);
-
- return last_io;
-}
-
static void finish_compressed_bio_read(struct compressed_bio *cb)
{
unsigned int index;
struct page *page;
+ if (cb->status == BLK_STS_OK)
+ cb->status = errno_to_blk_status(btrfs_decompress_bio(cb));
+
/* Release the compressed pages */
for (index = 0; index < cb->nr_pages; index++) {
page = cb->compressed_pages[index];
@@ -247,85 +152,63 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
}
/* Do io completion on the original bio */
- if (cb->status != BLK_STS_OK) {
+ if (cb->status != BLK_STS_OK)
cb->orig_bio->bi_status = cb->status;
- bio_endio(cb->orig_bio);
- } else {
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- /*
- * We have verified the checksum already, set page checked so
- * the end_io handlers know about it
- */
- ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED));
- bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
- u64 bvec_start = page_offset(bvec->bv_page) +
- bvec->bv_offset;
-
- btrfs_page_set_checked(btrfs_sb(cb->inode->i_sb),
- bvec->bv_page, bvec_start,
- bvec->bv_len);
- }
-
- bio_endio(cb->orig_bio);
- }
+ bio_endio(cb->orig_bio);
/* Finally free the cb struct */
kfree(cb->compressed_pages);
kfree(cb);
}
-/* when we finish reading compressed pages from the disk, we
- * decompress them and then run the bio end_io routines on the
- * decompressed pages (in the inode address space).
- *
- * This allows the checksumming and other IO error handling routines
- * to work normally
- *
- * The compressed pages are freed here, and it must be run
- * in process context
+/*
+ * Verify the checksums and kick off repair if needed on the uncompressed data
+ * before decompressing it into the original bio and freeing the uncompressed
+ * pages.
*/
static void end_compressed_bio_read(struct bio *bio)
{
struct compressed_bio *cb = bio->bi_private;
- struct inode *inode;
- unsigned int mirror = btrfs_bio(bio)->mirror_num;
- int ret = 0;
-
- if (!dec_and_test_compressed_bio(cb, bio))
- goto out;
-
- /*
- * Record the correct mirror_num in cb->orig_bio so that
- * read-repair can work properly.
- */
- btrfs_bio(cb->orig_bio)->mirror_num = mirror;
- cb->mirror_num = mirror;
-
- /*
- * Some IO in this cb have failed, just skip checksum as there
- * is no way it could be correct.
- */
- if (cb->status != BLK_STS_OK)
- goto csum_failed;
-
- inode = cb->inode;
- ret = check_compressed_csum(BTRFS_I(inode), bio,
- bio->bi_iter.bi_sector << 9);
- if (ret)
- goto csum_failed;
+ struct inode *inode = cb->inode;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_inode *bi = BTRFS_I(inode);
+ bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) &&
+ !test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
+ blk_status_t status = bio->bi_status;
+ struct btrfs_bio *bbio = btrfs_bio(bio);
+ struct bvec_iter iter;
+ struct bio_vec bv;
+ u32 offset;
+
+ btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
+ u64 start = bbio->file_offset + offset;
+
+ if (!status &&
+ (!csum || !btrfs_check_data_csum(inode, bbio, offset,
+ bv.bv_page, bv.bv_offset))) {
+ clean_io_failure(fs_info, &bi->io_failure_tree,
+ &bi->io_tree, start, bv.bv_page,
+ btrfs_ino(bi), bv.bv_offset);
+ } else {
+ int ret;
+
+ refcount_inc(&cb->pending_ios);
+ ret = btrfs_repair_one_sector(inode, bbio, offset,
+ bv.bv_page, bv.bv_offset,
+ btrfs_submit_data_read_bio);
+ if (ret) {
+ refcount_dec(&cb->pending_ios);
+ status = errno_to_blk_status(ret);
+ }
+ }
+ }
- /* ok, we're the last bio for this extent, lets start
- * the decompression.
- */
- ret = btrfs_decompress_bio(cb);
+ if (status)
+ cb->status = status;
-csum_failed:
- if (ret)
- cb->status = errno_to_blk_status(ret);
- finish_compressed_bio_read(cb);
-out:
+ if (refcount_dec_and_test(&cb->pending_ios))
+ finish_compressed_bio_read(cb);
+ btrfs_bio_free_csum(bbio);
bio_put(bio);
}
@@ -403,6 +286,14 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
kfree(cb);
}
+static void btrfs_finish_compressed_write_work(struct work_struct *work)
+{
+ struct compressed_bio *cb =
+ container_of(work, struct compressed_bio, write_end_work);
+
+ finish_compressed_bio_write(cb);
+}
+
/*
* Do the cleanup once all the compressed pages hit the disk. This will clear
* writeback on the file pages and free the compressed pages.
@@ -414,29 +305,18 @@ static void end_compressed_bio_write(struct bio *bio)
{
struct compressed_bio *cb = bio->bi_private;
- if (!dec_and_test_compressed_bio(cb, bio))
- goto out;
+ if (bio->bi_status)
+ cb->status = bio->bi_status;
- btrfs_record_physical_zoned(cb->inode, cb->start, bio);
+ if (refcount_dec_and_test(&cb->pending_ios)) {
+ struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
- finish_compressed_bio_write(cb);
-out:
+ btrfs_record_physical_zoned(cb->inode, cb->start, bio);
+ queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
+ }
bio_put(bio);
}
-static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
- struct bio *bio, int mirror_num)
-{
- blk_status_t ret;
-
- ASSERT(bio->bi_iter.bi_size);
- ret = btrfs_bio_wq_end_io(fs_info, bio, BTRFS_WQ_ENDIO_DATA);
- if (ret)
- return ret;
- ret = btrfs_map_bio(fs_info, bio, mirror_num);
- return ret;
-}
-
/*
* Allocate a compressed_bio, which will be used to read/write on-disk
* (aka, compressed) * data.
@@ -455,7 +335,7 @@ static blk_status_t submit_compressed_bio(struct btrfs_fs_info *fs_info,
static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr,
- unsigned int opf, bio_end_io_t endio_func,
+ blk_opf_t opf, bio_end_io_t endio_func,
u64 *next_stripe_start)
{
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
@@ -487,7 +367,7 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
return ERR_PTR(ret);
}
*next_stripe_start = disk_bytenr + geom.len;
-
+ refcount_inc(&cb->pending_ios);
return bio;
}
@@ -505,7 +385,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned int compressed_len,
struct page **compressed_pages,
unsigned int nr_pages,
- unsigned int write_flags,
+ blk_opf_t write_flags,
struct cgroup_subsys_state *blkcg_css,
bool writeback)
{
@@ -514,26 +394,25 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct compressed_bio *cb;
u64 cur_disk_bytenr = disk_start;
u64 next_stripe_start;
- blk_status_t ret;
+ blk_status_t ret = BLK_STS_OK;
int skip_sum = inode->flags & BTRFS_INODE_NODATASUM;
const bool use_append = btrfs_use_zone_append(inode, disk_start);
- const unsigned int bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
+ const enum req_op bio_op = use_append ? REQ_OP_ZONE_APPEND : REQ_OP_WRITE;
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
IS_ALIGNED(len, fs_info->sectorsize));
- cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
+ cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
if (!cb)
return BLK_STS_RESOURCE;
- refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
+ refcount_set(&cb->pending_ios, 1);
cb->status = BLK_STS_OK;
cb->inode = &inode->vfs_inode;
cb->start = start;
cb->len = len;
- cb->mirror_num = 0;
cb->compressed_pages = compressed_pages;
cb->compressed_len = compressed_len;
cb->writeback = writeback;
- cb->orig_bio = NULL;
+ INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
cb->nr_pages = nr_pages;
if (blkcg_css)
@@ -554,8 +433,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
&next_stripe_start);
if (IS_ERR(bio)) {
ret = errno_to_blk_status(PTR_ERR(bio));
- bio = NULL;
- goto finish_cb;
+ break;
}
if (blkcg_css)
bio->bi_opf |= REQ_CGROUP_PUNT;
@@ -599,44 +477,25 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
if (submit) {
if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, start, true);
- if (ret)
- goto finish_cb;
+ if (ret) {
+ bio->bi_status = ret;
+ bio_endio(bio);
+ break;
+ }
}
- ret = submit_compressed_bio(fs_info, bio, 0);
- if (ret)
- goto finish_cb;
+ ASSERT(bio->bi_iter.bi_size);
+ btrfs_submit_bio(fs_info, bio, 0);
bio = NULL;
}
cond_resched();
}
- if (blkcg_css)
- kthread_associate_blkcg(NULL);
-
- return 0;
-finish_cb:
if (blkcg_css)
kthread_associate_blkcg(NULL);
- if (bio) {
- bio->bi_status = ret;
- bio_endio(bio);
- }
- /* Last byte of @cb is submitted, endio will free @cb */
- if (cur_disk_bytenr == disk_start + compressed_len)
- return ret;
-
- wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
- (disk_start + compressed_len - cur_disk_bytenr) >>
- fs_info->sectorsize_bits);
- /*
- * Even with previous bio ended, we should still have io not yet
- * submitted, thus need to finish manually.
- */
- ASSERT(refcount_read(&cb->pending_sectors));
- /* Now we are the only one referring @cb, can finish it safely. */
- finish_compressed_bio_write(cb);
+ if (refcount_dec_and_test(&cb->pending_ios))
+ finish_compressed_bio_write(cb);
return ret;
}
@@ -765,7 +624,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
int zeros;
zeros = PAGE_SIZE - zero_offset;
memzero_page(page, zero_offset, zeros);
- flush_dcache_page(page);
}
}
@@ -819,7 +677,6 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
blk_status_t ret;
int ret2;
int i;
- u8 *sums;
em_tree = &BTRFS_I(inode)->extent_tree;
@@ -837,17 +694,15 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
compressed_len = em->block_len;
- cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
+ cb = kmalloc(sizeof(struct compressed_bio), GFP_NOFS);
if (!cb) {
ret = BLK_STS_RESOURCE;
goto out;
}
- refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
+ refcount_set(&cb->pending_ios, 1);
cb->status = BLK_STS_OK;
cb->inode = inode;
- cb->mirror_num = mirror_num;
- sums = cb->sums;
cb->start = em->orig_start;
em_len = em->len;
@@ -893,9 +748,8 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
REQ_OP_READ, end_compressed_bio_read,
&next_stripe_start);
if (IS_ERR(comp_bio)) {
- ret = errno_to_blk_status(PTR_ERR(comp_bio));
- comp_bio = NULL;
- goto finish_cb;
+ cb->status = errno_to_blk_status(PTR_ERR(comp_bio));
+ break;
}
}
/*
@@ -931,22 +785,33 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
submit = true;
if (submit) {
- unsigned int nr_sectors;
+ /* Save the original iter for read repair */
+ if (bio_op(comp_bio) == REQ_OP_READ)
+ btrfs_bio(comp_bio)->iter = comp_bio->bi_iter;
- ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
- if (ret)
- goto finish_cb;
+ /*
+ * Save the initial offset of this chunk, as there
+ * is no direct correlation between compressed pages and
+ * the original file offset. The field is only used for
+ * priting error messages.
+ */
+ btrfs_bio(comp_bio)->file_offset = file_offset;
- nr_sectors = DIV_ROUND_UP(comp_bio->bi_iter.bi_size,
- fs_info->sectorsize);
- sums += fs_info->csum_size * nr_sectors;
+ ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL);
+ if (ret) {
+ comp_bio->bi_status = ret;
+ bio_endio(comp_bio);
+ break;
+ }
- ret = submit_compressed_bio(fs_info, comp_bio, mirror_num);
- if (ret)
- goto finish_cb;
+ ASSERT(comp_bio->bi_iter.bi_size);
+ btrfs_submit_bio(fs_info, comp_bio, mirror_num);
comp_bio = NULL;
}
}
+
+ if (refcount_dec_and_test(&cb->pending_ios))
+ finish_compressed_bio_read(cb);
return;
fail:
@@ -964,25 +829,6 @@ out:
bio->bi_status = ret;
bio_endio(bio);
return;
-finish_cb:
- if (comp_bio) {
- comp_bio->bi_status = ret;
- bio_endio(comp_bio);
- }
- /* All bytes of @cb is submitted, endio will free @cb */
- if (cur_disk_byte == disk_bytenr + compressed_len)
- return;
-
- wait_var_event(cb, refcount_read(&cb->pending_sectors) ==
- (disk_bytenr + compressed_len - cur_disk_byte) >>
- fs_info->sectorsize_bits);
- /*
- * Even with previous bio ended, we should still have io not yet
- * submitted, thus need to finish @cb manually.
- */
- ASSERT(refcount_read(&cb->pending_sectors));
- /* Now we are the only one referring @cb, can finish it safely. */
- finish_compressed_bio_read(cb);
}
/*
@@ -1481,7 +1327,6 @@ int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
ASSERT(copy_start - decompressed < buf_len);
memcpy_to_page(bvec.bv_page, bvec.bv_offset,
buf + copy_start - decompressed, copy_len);
- flush_dcache_page(bvec.bv_page);
cur_offset += copy_len;
bio_advance(orig_bio, copy_len);