/* * Copyright (c) 2019 Tom Marshall * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include // XXX: find a better name for this, something about storage vs. speed. // XXX: should this be in cbd_params? // #define CBD_DETECT_ZERO_BLOCKS /* * XXX * If we don't use a workqueue, blkdev_pblk_io() stalls. Why? */ #define USE_WORKQUEUE 1 #define ZONE_NONE (u32)(~0) #define PBLK_NONE (u64)(~0) #define LBLK_NONE (u64)(~0) /* per bio private data */ struct dm_compress_io { struct dm_compress* dc; struct bio* bio; struct work_struct work; }; struct dm_compress { struct dm_dev* dev; struct cbd_params params; bool io_failed; /* XXX: remove when debugging complete */ /* XXX: dm_target.off */ sector_t dm_off; u8* lz4_wrkmem; u8* lz4_cbuf; /* Currently cached zone pblk alloc info (if any) */ u32 pblk_alloc_idx; bool pblk_alloc_dirty; void* pblk_alloc; /* Currently cached zone lblk alloc info (if any) */ u64 lblk_alloc_pblk; u32 lblk_alloc_len; void* lblk_alloc; u64 lblk_alloc_elem_lblk; struct lblk_alloc_elem* lblk_alloc_elem; /* Currently cached lblk data (if any) */ u64 lblk_num; bool lblk_dirty; void* lblk; /* Queueing stuff */ struct workqueue_struct* io_queue; struct mutex io_lock; }; static inline int memcmpz(const void* buf, size_t len) { const char* end = (const char*)buf + len; const char* p; for (p = (const char*)buf; p < end; ++p) { if (*p) { return 1; } } return 0; } static inline u64 blkdev_pblk_size(struct block_device *bdev) { return i_size_read(bdev->bd_inode) >> PBLK_SHIFT; } static inline u64 dm_target_pblk_size(struct dm_target* ti) { return ti->len >> (PBLK_SHIFT - SECTOR_SHIFT); } /************************************* * Page level memory allocator **************************************/ static void* compress_alloc_pages(size_t size) { unsigned int order = get_order(size); void* ret; if (size > (PAGE_SIZE * 128) || order > 7) { printk(KERN_ERR "%s: size %zu order %u too large\n", __func__, size, order); return NULL; } ret = (void*)__get_free_pages(GFP_KERNEL, order); if (!ret) { printk(KERN_ERR "%s: failed to alloc %zu bytes\n", __func__, size); } memset(ret, 0, size); return ret; } static void compress_free_pages(void* ptr, size_t size) { unsigned int order = get_order(size); size_t n; size_t in_use = 0; if (!ptr) { return; } for (n = 0; n < (1 << order); ++n) { struct page* pg = virt_to_page(ptr + n * PAGE_SIZE); int refcount = page_ref_count(pg); if (n == 0) { --refcount; } if (refcount) { ++in_use; } } if (in_use) { printk(KERN_ERR "%s: *** %zu of %zu pages in use ***\n", __func__, in_use, n); return; } free_pages((unsigned long)ptr, order); } /************************************** * Core low-level I/O. * * pblk count are in units of physical blocks (4096 bytes), NOT sectors. * data is a page address (obtained via __get_free_pages and friends). **************************************/ static struct bio* blkdev_pblk_io_prepare(struct block_device* dev, unsigned int op, u64 pblk, u32 count, void *data) { unsigned long data_addr; struct bio* bio; data_addr = (unsigned long)data; BUG_ON(data_addr & (PAGE_SIZE-1)); BUG_ON(!virt_addr_valid(data)); bio = bio_alloc(GFP_KERNEL, count); if (!bio) { printk(KERN_ERR "%s: out of memory\n", __func__); return NULL; } bio_set_dev(bio, dev); bio->bi_opf = op; bio->bi_iter.bi_sector = (pblk << (PBLK_SHIFT - SECTOR_SHIFT)); while (count--) { struct page *page = virt_to_page(data); if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) { BUG(); } data = (u8*)data + PAGE_SIZE; } return bio; } static int blkdev_pblk_read(struct block_device* dev, u64 pblk, u32 count, void *data) { int ret; struct bio* bio; bio = blkdev_pblk_io_prepare(dev, REQ_OP_READ, pblk, count, data); if (!bio) { printk(KERN_ERR "%s: out of memory\n", __func__); return -ENOMEM; } ret = submit_bio_wait(bio); if (ret != 0) { printk(KERN_ERR "%s: submit_bio_wait failed: %d\n", __func__, ret); } bio_put(bio); return ret; } static int blkdev_pblk_write(struct block_device* dev, u64 pblk, u32 count, void *data) { int ret; struct bio* bio; bio = blkdev_pblk_io_prepare(dev, REQ_OP_WRITE, pblk, count, data); if (!bio) { printk(KERN_ERR "%s: out of memory\n", __func__); return -ENOMEM; } /* XXX: Make writes asychronous. */ ret = submit_bio_wait(bio); if (ret != 0) { printk(KERN_ERR "%s: submit_bio_wait failed: %d\n", __func__, ret); } bio_put(bio); return ret; } /************************************** * Zone pblk functions **************************************/ static int pblk_alloc_write(struct dm_compress* dc) { int ret; u64 pblk; u32 count; BUG_ON(dc->pblk_alloc_idx == ZONE_NONE); pblk = pblk_alloc_off(&dc->params, dc->pblk_alloc_idx); count = pblk_alloc_len(&dc->params); ret = blkdev_pblk_write(dc->dev->bdev, pblk, count, dc->pblk_alloc); if (ret != 0) { return ret; } dc->pblk_alloc_dirty = false; return 0; } static int pblk_alloc_flush(struct dm_compress* dc) { int ret; if (dc->pblk_alloc_dirty) { ret = pblk_alloc_write(dc); if (ret) { return ret; } } return 0; } /* Read zone physical block alloc bitmap */ static int pblk_alloc_read(struct dm_compress* dc, u32 idx) { int ret; u64 pblk; u32 count; if (dc->pblk_alloc_idx == idx) { return 0; } ret = pblk_alloc_flush(dc); if (ret != 0) { return ret; } pblk = pblk_alloc_off(&dc->params, idx); count = pblk_alloc_len(&dc->params); ret = blkdev_pblk_read(dc->dev->bdev, pblk, count, dc->pblk_alloc); if (ret) { return ret; } dc->pblk_alloc_idx = idx; return 0; } /* * Get (allocate) one pblk from the currently cached zone pblk alloc bitmap. * XXX: get rid of this function and use pblk_alloc directly in lblk_write(). */ static u64 pblk_alloc_get(struct dm_compress* dc, u32 zone_hint) { u32 zone_pblk_count = pblk_alloc_len(&dc->params) * PBLK_SIZE_BITS; u32 zone; u32 idx; zone = zone_hint; /* XXX: check both forward and backward */ do { if (pblk_alloc_read(dc, zone) != 0) { printk(KERN_ERR " pblk_alloc_read failed\n"); return 0; } idx = cbd_bitmap_alloc(dc->pblk_alloc, zone_pblk_count); if (idx != zone_pblk_count) { dc->pblk_alloc_dirty = true; return zone_data_off(&dc->params, zone) + idx; } ++zone; if (zone == dc->params.nr_zones) { zone = 0; } } while (zone != zone_hint); printk(KERN_ERR "%s: fail, all zones full\n", __func__); return 0; } /* * Put (free) one pblk into the currently cached zone pblk alloc bitmap. * XXX: get rid of this function and use pblk_free directly in lblk_write(). */ static int pblk_alloc_put(struct dm_compress* dc, u64 pblk) { u32 zone_pblk_count = pblk_alloc_len(&dc->params) * PBLK_SIZE_BITS; u32 zone; u32 idx; int ret; if (pblk < CBD_HEADER_BLOCKS) { printk(KERN_ERR "%s: pblk index is in header\n", __func__); return -EINVAL; } zone = (pblk - CBD_HEADER_BLOCKS) / zone_len(&dc->params); if (zone >= dc->params.nr_zones) { printk(KERN_ERR "%s: pblk zone out of bounds\n", __func__); return -EINVAL; } if (pblk < zone_data_off(&dc->params, zone)) { printk(KERN_ERR "%s: pblk index in metadata\n", __func__); return -EINVAL; } idx = pblk - zone_data_off(&dc->params, zone); if (idx >= zone_pblk_count) { printk(KERN_ERR "%s: pblk index out of bounds\n", __func__); return -EINVAL; } ret = pblk_alloc_read(dc, zone); if (ret != 0) { return ret; } cbd_bitmap_free(dc->pblk_alloc, idx); dc->pblk_alloc_dirty = true; return 0; } /************************************** * Zone lblk functions **************************************/ static int lblk_alloc_elem_write(struct dm_compress* dc) { int ret; u32 zone; u32 zone_lblk; u32 elem_off; u32 elem_end; u32 rel_pblk; u32 count; u64 pblk; u8* buf; BUG_ON(dc->lblk_alloc_elem_lblk == LBLK_NONE); BUG_ON(dc->lblk_alloc_pblk == PBLK_NONE); BUG_ON(dc->lblk_alloc_len == 0); zone = dc->lblk_alloc_elem_lblk / dc->params.lblk_per_zone; zone_lblk = dc->lblk_alloc_elem_lblk - (zone * dc->params.lblk_per_zone); elem_off = lblk_alloc_elem_len(&dc->params) * zone_lblk; elem_end = elem_off + lblk_alloc_elem_len(&dc->params); rel_pblk = elem_off / PBLK_SIZE; count = dc->lblk_alloc_len; pblk = dc->lblk_alloc_pblk; buf = dc->lblk_alloc + (elem_off - rel_pblk * PBLK_SIZE); lblk_alloc_elem_put(&dc->params, buf, dc->lblk_alloc_elem); ret = blkdev_pblk_write(dc->dev->bdev, pblk, count, dc->lblk_alloc); return ret; } static int lblk_alloc_elem_read(struct dm_compress* dc, u64 lblk) { int ret; u32 zone; u32 zone_lblk; u32 elem_off; u32 elem_end; u32 rel_pblk; u32 count; u64 pblk; u8* buf; if (dc->lblk_alloc_elem_lblk == lblk) { return 0; } zone = lblk / dc->params.lblk_per_zone; zone_lblk = lblk - (zone * dc->params.lblk_per_zone); elem_off = lblk_alloc_elem_len(&dc->params) * zone_lblk; elem_end = elem_off + lblk_alloc_elem_len(&dc->params); rel_pblk = elem_off / PBLK_SIZE; count = 1 + (elem_end - 1) / PBLK_SIZE - (elem_off / PBLK_SIZE); pblk = lblk_alloc_off(&dc->params, zone) + rel_pblk; if (dc->lblk_alloc_pblk != pblk || dc->lblk_alloc_len < count) { ret = blkdev_pblk_read(dc->dev->bdev, pblk, count, dc->lblk_alloc); if (ret != 0) { return ret; } dc->lblk_alloc_pblk = pblk; dc->lblk_alloc_len = count; } buf = dc->lblk_alloc + (elem_off - rel_pblk * PBLK_SIZE); lblk_alloc_elem_get(&dc->params, buf, dc->lblk_alloc_elem); dc->lblk_alloc_elem_lblk = lblk; return 0; } /************************************** * Logical block functions **************************************/ /* * Compress dc->lblk into dc->lz4_cbuf * * Returns number of bytes in cbuf or 0 for failure. */ static size_t lblk_compress(struct dm_compress* dc) { int ret; void *dbuf = dc->lblk; u32 dlen = PBLK_SIZE * lblk_per_pblk(&dc->params); void *cbuf = dc->lz4_cbuf; u32 clen = PBLK_SIZE * lblk_per_pblk(&dc->params); ret = LZ4_compress_default(dbuf, cbuf, dlen, clen, dc->lz4_wrkmem); if (ret <= 0) { return 0; } return (size_t)ret; } /* * Decompress dc->lz4_cbuf of size clen into dc->lblk * * Returns 0 for success, <0 for failure. */ static int lblk_decompress(struct dm_compress* dc, u32 clen) { int ret; void *cbuf = dc->lz4_cbuf; void *dbuf = dc->lblk; u32 dlen = PBLK_SIZE * lblk_per_pblk(&dc->params); ret = LZ4_decompress_safe(cbuf, dbuf, clen, dlen); if (ret != dlen) { printk(KERN_ERR "%s: failed, ret=%d (expected %u)\n", __func__, ret, (unsigned int)dlen); return -1; } return 0; } static int lblk_write(struct dm_compress* dc) { int ret; u32 zone; u32 zone_lblk; u8* elem_buf; size_t d_len; size_t c_len; u8* c_buf; u32 n; u64 pblk; zone = dc->lblk_num / dc->params.lblk_per_zone; zone_lblk = dc->lblk_num - (zone * dc->params.lblk_per_zone); elem_buf = dc->lblk_alloc + zone_lblk * lblk_alloc_elem_len(&dc->params); /* We must have a cached lblk elem */ BUG_ON(dc->lblk_alloc_elem_lblk == LBLK_NONE); d_len = PBLK_SIZE * lblk_per_pblk(&dc->params); #ifdef CBD_DETECT_ZERO_BLOCKS if (memcmpz(dc->lblk, d_len) == 0) { #else if (0) { #endif c_len = 0; c_buf = NULL; dc->lblk_alloc_elem->len = 0; } else { c_len = lblk_compress(dc); if (c_len > 0) { c_buf = dc->lz4_cbuf; dc->lblk_alloc_elem->len = c_len; } else { c_len = d_len; c_buf = dc->lblk; dc->lblk_alloc_elem->len = CBD_UNCOMPRESSED; } } for (n = 0; n < lblk_per_pblk(&dc->params); ++n) { if (c_len > PBLK_SIZE * n) { pblk = dc->lblk_alloc_elem->pblk[n]; if (!pblk) { pblk = pblk_alloc_get(dc, zone); if (pblk == 0) { printk(KERN_ERR " pblk_alloc_get failed\n"); return -ENOSPC; } dc->lblk_alloc_elem->pblk[n] = pblk; } blkdev_pblk_write(dc->dev->bdev, pblk, 1, c_buf); c_buf += PBLK_SIZE; } else { pblk = dc->lblk_alloc_elem->pblk[n]; if (pblk) { dc->lblk_alloc_elem->pblk[n] = 0; ret = pblk_alloc_put(dc, pblk); if (ret != 0) { printk(KERN_ERR " pblk_alloc_put failed\n"); return ret; } } } } ret = lblk_alloc_elem_write(dc); if (ret != 0) { printk(KERN_ERR " lblk_alloc_elem_write failed\n"); return ret; } ret = pblk_alloc_flush(dc); if (ret != 0) { printk(KERN_ERR " pblk_alloc_flush failed\n"); return ret; } dc->lblk_dirty = false; return 0; } static int lblk_flush(struct dm_compress* dc) { int ret; if (dc->lblk_dirty) { ret = lblk_write(dc); if (ret) { return ret; } } return 0; } static int lblk_read(struct dm_compress* dc, u64 idx) { int ret; u32 zone; u32 zone_lblk; u8* elem_buf; u32 c_len; u64 pblk; if (dc->lblk_num == idx) { return 0; } ret = lblk_flush(dc); if (ret) { return ret; } zone = idx / dc->params.lblk_per_zone; zone_lblk = idx - (zone * dc->params.lblk_per_zone); elem_buf = dc->lblk_alloc + zone_lblk * lblk_alloc_elem_len(&dc->params); ret = lblk_alloc_elem_read(dc, idx); if (ret != 0) { printk(KERN_ERR " lblk_alloc_elem_read failed\n"); return ret; } c_len = dc->lblk_alloc_elem->len; if (c_len == 0) { memset(dc->lblk, 0, PBLK_SIZE * lblk_per_pblk(&dc->params)); } else { bool is_compressed = true; size_t d_len = PBLK_SIZE * lblk_per_pblk(&dc->params); size_t n; u8* p; if (c_len == CBD_UNCOMPRESSED) { is_compressed = false; c_len = d_len; } p = dc->lz4_cbuf; for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) { pblk = dc->lblk_alloc_elem->pblk[n]; BUG_ON(pblk == 0); ret = blkdev_pblk_read(dc->dev->bdev, pblk, 1, p); if (ret != 0) { return ret; } } if (is_compressed) { if (lblk_decompress(dc, c_len) != 0) { printk(KERN_ERR " decompress failed\n"); return -1; } } else { memcpy(dc->lblk, dc->lz4_cbuf, d_len); } } dc->lblk_num = idx; return 0; } /************************************** * Main functions **************************************/ static void compress_free_buffers(struct dm_compress* dc) { compress_free_pages(dc->lblk, PBLK_SIZE * lblk_per_pblk(&dc->params)); dc->lblk = NULL; kfree(dc->lblk_alloc_elem); dc->lblk_alloc_elem = NULL; compress_free_pages(dc->lblk_alloc, PBLK_SIZE * 2); dc->lblk_alloc = NULL; compress_free_pages(dc->pblk_alloc, PBLK_SIZE * pblk_alloc_len(&dc->params)); dc->pblk_alloc = NULL; compress_free_pages(dc->lz4_cbuf, PBLK_SIZE * lblk_per_pblk(&dc->params)); dc->lz4_cbuf = NULL; kfree(dc->lz4_wrkmem); dc->lz4_wrkmem = NULL; } /* * XXX: Many of the below (all except lz4 buffers) are used in bio operations * and should be page aligned. We always get page aligned buffers because of * the way kmalloc() works, but that is technically not guaranteed. */ static int compress_alloc_buffers(struct dm_compress* dc) { dc->lz4_wrkmem = kmalloc(LZ4_compressBound(PBLK_SIZE * lblk_per_pblk(&dc->params)), GFP_KERNEL); if (!dc->lz4_wrkmem) { printk(KERN_ERR "%s: Failed to alloc lz4_wrkmem\n", __func__); goto out_nomem; } dc->lz4_cbuf = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params)); if (!dc->lz4_cbuf) { printk(KERN_ERR "%s: Failed to alloc lz4_cmem\n", __func__); goto out_nomem; } dc->pblk_alloc_idx = ZONE_NONE; dc->pblk_alloc_dirty = false; dc->pblk_alloc = compress_alloc_pages(PBLK_SIZE * pblk_alloc_len(&dc->params)); if (!dc->pblk_alloc) { printk(KERN_ERR "%s: Failed to alloc pblk_alloc\n", __func__); goto out_nomem; } dc->lblk_alloc_pblk = PBLK_NONE; dc->lblk_alloc_len = 0; dc->lblk_alloc = compress_alloc_pages(PBLK_SIZE * 2); if (!dc->lblk_alloc) { printk(KERN_ERR "%s: Failed to alloc lblk_alloc\n", __func__); goto out_nomem; } dc->lblk_alloc_elem_lblk = LBLK_NONE; dc->lblk_alloc_elem = kmalloc(offsetof(struct lblk_alloc_elem, pblk[lblk_per_pblk(&dc->params)]), GFP_KERNEL); if (!dc->lblk_alloc_elem) { printk(KERN_ERR "%s: Failed to alloc lblk_alloc_elem\n", __func__); goto out_nomem; } dc->lblk_num = LBLK_NONE; dc->lblk_dirty = false; dc->lblk = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params)); if (!dc->lblk) { printk(KERN_ERR "%s: Failed to alloc lblk\n", __func__); goto out_nomem; } return 0; out_nomem: compress_free_buffers(dc); return -ENOMEM; } static int compress_open(struct dm_compress* dc, u64 dev_nr_pblks) { int err; u8 *pblkbuf; struct cbd_header header; u64 max_nr_zones; pblkbuf = kmalloc(PBLK_SIZE, GFP_KERNEL); if (!pblkbuf) { return -ENOMEM; } err = blkdev_pblk_read(dc->dev->bdev, 0, 1, pblkbuf); if (err) { printk(KERN_ERR "%s: failed to read header\n", __func__); goto out; } cbd_header_get(pblkbuf, &header); if (memcmp(header.magic, CBD_MAGIC, sizeof(header.magic)) != 0) { printk(KERN_ERR "%s: bad magic\n", __func__); err = -EINVAL; goto out; } if (header.version_major != CBD_VERSION_MAJOR) { printk(KERN_ERR "%s: bad version\n", __func__); err = -EINVAL; goto out; } if (header.version_minor != CBD_VERSION_MINOR) { printk(KERN_ERR "%s: bad version\n", __func__); err = -EINVAL; goto out; } if (header.params.lblk_shift < LBLK_SHIFT_MIN || header.params.lblk_shift > LBLK_SHIFT_MAX) { printk(KERN_ERR "%s: bad lblk_shift\n", __func__); err = -EINVAL; goto out; } /* XXX: validate minumum pblk using zone_off(max_zone+1) */ if (header.params.nr_pblk > dev_nr_pblks) { printk(KERN_ERR "%s: bad nr_pblk\n", __func__); err = -EINVAL; goto out; } max_nr_zones = (dev_nr_pblks - CBD_HEADER_BLOCKS) / zone_len(&header.params); if (header.params.nr_zones > max_nr_zones) { printk(KERN_ERR "%s: bad nr_zones\n", __func__); err = -EINVAL; goto out; } /* XXX: validate lblk_per_zone */ printk(KERN_INFO "%s: parameters...\n", __func__); printk(KERN_INFO " algorithm=%hu\n", (unsigned short)header.params.algorithm); printk(KERN_INFO " compression=%hu\n", (unsigned short)header.params.compression); printk(KERN_INFO " lblk_shift=%hu\n", (unsigned short)header.params.lblk_shift); printk(KERN_INFO " nr_pblk=%lu\n", (unsigned long)header.params.nr_pblk); printk(KERN_INFO " nr_zones=%u\n", (unsigned int)header.params.nr_zones); printk(KERN_INFO " lblk_per_zone=%u\n", (unsigned int)header.params.lblk_per_zone); memcpy(&dc->params, &header.params, sizeof(header.params)); err = compress_alloc_buffers(dc); if (err) { printk(KERN_ERR "%s: failed to alloc buffers\n", __func__); goto out; } dc->io_queue = alloc_workqueue("kcompress_io", WQ_HIGHPRI | WQ_MEM_RECLAIM, 1); if (!dc->io_queue) { printk(KERN_ERR "%s: failed to alloc io_queue\n", __func__); compress_free_buffers(dc); return -ENOMEM; } mutex_init(&dc->io_lock); out: kfree(pblkbuf); return err; } static int compress_read(struct dm_compress *dc, struct bio *bio) { struct bio_vec bv; struct bvec_iter iter; int ret; u32 lblk_per_sector = lblk_per_pblk(&dc->params) * PBLK_PER_SECTOR; u32 lblk_len = lblk_per_sector * SECTOR_SIZE; bio_for_each_segment(bv, bio, iter) { sector_t lblk = iter.bi_sector / lblk_per_sector; u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE; unsigned long flags; char* data; /* Ensure the data is within the logical block */ if (lblk_off + bv.bv_len > lblk_len) { printk(KERN_ERR "%s: logical block bounds exceeded\n", __func__); return -EIO; } /* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */ ret = lblk_read(dc, lblk); if (ret) { return ret; } data = bvec_kmap_irq(&bv, &flags); memcpy(data, dc->lblk + lblk_off, bv.bv_len); bvec_kunmap_irq(data, &flags); } return 0; } static int compress_write(struct dm_compress *dc, struct bio *bio) { struct bio_vec bv; struct bvec_iter iter; int ret; u32 lblk_per_sector = lblk_per_pblk(&dc->params) * PBLK_PER_SECTOR; u32 lblk_len = lblk_per_sector * SECTOR_SIZE; bio_for_each_segment(bv, bio, iter) { sector_t lblk = iter.bi_sector / lblk_per_sector; u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE; unsigned long flags; char* data; /* Ensure the data is within the logical block */ if (lblk_off + bv.bv_len > lblk_len) { printk(KERN_ERR "%s logical block bounds exceeded\n", __func__); printk(KERN_ERR " sector=%lu\n", (unsigned long)iter.bi_sector); printk(KERN_ERR " bv_len=%u bv_offset=%u\n", bv.bv_len, bv.bv_offset); printk(KERN_ERR " lblk=%lu lblk_off=%u\n", (unsigned long)lblk, lblk_off); return -EIO; } /* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */ ret = lblk_read(dc, lblk); if (ret) { return ret; } data = bvec_kmap_irq(&bv, &flags); memcpy(dc->lblk + lblk_off, data, bv.bv_len); bvec_kunmap_irq(data, &flags); dc->lblk_dirty = true; } ret = lblk_flush(dc); if (ret) { return ret; } return 0; } static void compress_io(struct dm_compress_io* io) { int ret; struct dm_compress* dc = io->dc; struct bio* bio = io->bio; if (dc->io_failed) { bio->bi_status = BLK_STS_IOERR; /* XXX */ bio_endio(bio); return; } mutex_lock(&dc->io_lock); switch (bio_op(bio)) { case REQ_OP_READ: ret = compress_read(dc, bio); break; case REQ_OP_WRITE: ret = compress_write(dc, bio); break; default: printk(KERN_ERR "%s: unknown op in bio: %u\n", __func__, bio_op(bio)); ret = -EINVAL; } if (ret) { printk(KERN_ERR "%s: failed, ret=%d\n", __func__, ret); dc->io_failed = true; } mutex_unlock(&dc->io_lock); bio->bi_status = (ret == 0 ? BLK_STS_OK : BLK_STS_IOERR); /* XXX */ bio_endio(bio); } #ifdef USE_WORKQUEUE static void compress_io_work(struct work_struct *work) { struct dm_compress_io *io = container_of(work, struct dm_compress_io, work); compress_io(io); } #endif /* * Usage: * echo " compress " | dmsetup create * Where: * start_sector is the starting sector of the backing device. * end_sector is the ending sector of the backing device. * compress is the name of this module. * backing_device is the name backing device. * args is: * create [lblk_shift=#] * open * compress_name is the name of the compress device. */ static int compress_ctr(struct dm_target *ti, unsigned int argc, char **argv) { int err; unsigned int argn; struct dm_compress *dc = NULL; u64 dev_nr_pblks; printk(KERN_INFO "%s: enter: argc=%u\n", __func__, argc); for (argn = 0; argn < argc; ++argn) { printk(KERN_INFO " ... arg[%u]=\"%s\"\n", argn, argv[argn]); } if (argc == 0) { ti->error = "No device specified"; return -EINVAL; } argn = 1; while (argn < argc) { const char* arg = argv[argn++]; const char* eq = strchr(arg, '='); if (!eq) { ti->error = "Invalid argument format"; return -EINVAL; } #if 0 if (!memcmp(arg, "verbose", 7)) { err = kstrtouint(eq + 1, 0, &verbose_level); if (err) { ti->error = "Failed to parse verbose"; return -EINVAL; } continue; } #endif ti->error = "Unrecognized argument"; return -EINVAL; } dc = kzalloc(sizeof(struct dm_compress), GFP_KERNEL); if (!dc) { ti->error = "Failed to allocate target"; return -ENOMEM; } if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dc->dev)) { ti->error = "Device lookup failed"; kfree(dc); return -EINVAL; } dc->dm_off = ti->begin; ti->private = dc; dev_nr_pblks = dm_target_pblk_size(ti); if (get_order(dev_nr_pblks) >= 48) { ti->error = "Device too large"; kfree(dc); return -EINVAL; } ti->per_io_data_size = ALIGN(sizeof(struct dm_compress_io), ARCH_KMALLOC_MINALIGN); err = compress_open(dc, dev_nr_pblks); if (err) { dm_put_device(ti, dc->dev); kfree(dc); return err; } printk(KERN_INFO "%s: success\n", __func__); return 0; } static void compress_dtr(struct dm_target *ti) { struct dm_compress *dc; printk(KERN_INFO "%s: enter\n", __func__); dc = (struct dm_compress *)ti->private; compress_free_buffers(dc); if (dc->io_queue) { destroy_workqueue(dc->io_queue); } dm_put_device(ti, dc->dev); kfree(dc); } static int compress_map(struct dm_target *ti, struct bio *bio) { struct dm_compress *dc = (struct dm_compress *)ti->private; struct dm_compress_io *io; /* from dm-crypt.c */ if (unlikely(bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)) { bio_set_dev(bio, dc->dev->bdev); if (bio_sectors(bio)) { /* XXX: remap to underlying data */ } return DM_MAPIO_REMAPPED; } /* Synchronous I/O operations deadlock, so queue them. */ /* XXX: clone the bio? */ io = dm_per_bio_data(bio, ti->per_io_data_size); io->dc = dc; io->bio = bio; #ifdef USE_WORKQUEUE INIT_WORK(&io->work, compress_io_work); queue_work(dc->io_queue, &io->work); #else compress_io(io); #endif return DM_MAPIO_SUBMITTED; } static struct target_type compress_target = { .name = "compress", .version = { 1, 0, 0 }, .module = THIS_MODULE, .ctr = compress_ctr, .dtr = compress_dtr, .map = compress_map, }; static int __init dm_compress_init(void) { int res; res = dm_register_target(&compress_target); if (res < 0) { printk(KERN_ERR "Failed to register dm-compress: %d\n", res); } return res; } static void __exit dm_compress_exit(void) { dm_unregister_target(&compress_target); } module_init(dm_compress_init); module_exit(dm_compress_exit); MODULE_DESCRIPTION("compress target for transparent compression"); MODULE_AUTHOR("Tom Marshall "); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0");