/* * Copyright (c) 2019 Tom Marshall * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include // XXX: find a better name for this, something about storage vs. speed. // XXX: should this be in cbd_params? // #define CBD_DETECT_ZERO_BLOCKS /* * XXX * If we don't use a workqueue, blkdev_pblk_io() stalls. Why? */ #define USE_WORKQUEUE 1 #define ZONE_NONE (u32)(~0) #define PBLK_NONE (u64)(~0) #define LBLK_NONE (u64)(~0) /* per bio private data */ struct dm_compress_io { struct dm_compress* dc; struct bio* bio; struct work_struct work; }; struct zone_cache { u32 zone; struct mutex lock; /* Compression working memory */ u8* lz4_wrkmem; u8* lz4_cbuf; /* Currently cached zone pbat (if any) */ bool pbat_data_cached; bool pbat_data_dirty; void* pbat_buf; /* Currently cached zone lblk alloc info (if any) */ u64 lbat_data_pblk; u32 lbat_data_len; void* lbat_buf; u64 lbat_elem_lblk; struct lbat_elem* lbat_elem; /* Currently cached lblk data (if any) */ u64 lblk_num; bool lblk_dirty; void* lblk; }; struct dm_compress { struct dm_dev* dev; bool io_failed; struct cbd_params params; struct mutex zc_lock; unsigned int nr_zc; struct zone_cache* zcache; /* Queueing stuff */ struct workqueue_struct* io_queue; }; /* Forward decls */ static struct zone_cache* zone_cache_get(struct dm_compress*, u32); static int zone_cache_put(struct dm_compress*, struct zone_cache*); static inline int memcmpz(const void* buf, size_t len) { const char* end = (const char*)buf + len; const char* p; for (p = (const char*)buf; p < end; ++p) { if (*p) { return 1; } } return 0; } static inline u64 blkdev_pblk_size(struct block_device *bdev) { return i_size_read(bdev->bd_inode) >> PBLK_SHIFT; } static inline u64 dm_target_pblk_size(struct dm_target* ti) { return ti->len >> (PBLK_SHIFT - SECTOR_SHIFT); } /************************************* * Page level memory allocator **************************************/ static void* compress_alloc_pages(size_t size) { unsigned int order = get_order(size); void* ret; if (size > (PAGE_SIZE * 128) || order > 7) { printk(KERN_ERR "%s: size %zu order %u too large\n", __func__, size, order); return NULL; } ret = (void*)__get_free_pages(GFP_KERNEL, order); if (!ret) { printk(KERN_ERR "%s: failed to alloc %zu bytes\n", __func__, size); } memset(ret, 0, size); return ret; } static void compress_free_pages(void* ptr, size_t size) { unsigned int order = get_order(size); size_t n; size_t in_use = 0; if (!ptr) { return; } for (n = 0; n < (1 << order); ++n) { struct page* pg = virt_to_page(ptr + n * PAGE_SIZE); int refcount = page_ref_count(pg); if (n == 0) { --refcount; } if (refcount) { ++in_use; } } if (in_use) { printk(KERN_ERR "%s: *** %zu of %zu pages in use ***\n", __func__, in_use, n); return; } free_pages((unsigned long)ptr, order); } /************************************** * Core low-level I/O. * * pblk count are in units of physical blocks (4096 bytes), NOT sectors. * data is a page address (obtained via __get_free_pages and friends). **************************************/ static struct bio* blkdev_pblk_io_prepare(struct dm_compress* dc, unsigned int op, u64 pblk, u32 count, void *data) { unsigned long data_addr; struct bio* bio; data_addr = (unsigned long)data; BUG_ON(data_addr & (PAGE_SIZE-1)); BUG_ON(!virt_addr_valid(data)); bio = bio_alloc(GFP_KERNEL, count); if (!bio) { printk(KERN_ERR "%s: out of memory\n", __func__); return NULL; } bio_set_dev(bio, dc->dev->bdev); bio->bi_opf = op; bio->bi_iter.bi_sector = (pblk << (PBLK_SHIFT - SECTOR_SHIFT)); while (count--) { struct page *page = virt_to_page(data); if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) { BUG(); } data = (u8*)data + PAGE_SIZE; } return bio; } static int blkdev_pblk_read(struct dm_compress* dc, u64 pblk, u32 count, void *data) { int ret; struct bio* bio; bio = blkdev_pblk_io_prepare(dc, REQ_OP_READ, pblk, count, data); if (!bio) { printk(KERN_ERR "%s: out of memory\n", __func__); return -ENOMEM; } ret = submit_bio_wait(bio); if (ret != 0) { printk(KERN_ERR "%s: submit_bio_wait failed: %d\n", __func__, ret); } bio_put(bio); return ret; } static void blkdev_pblk_write_endio(struct bio* bio) { void* data = page_address(bio->bi_io_vec[0].bv_page); unsigned int count = bio->bi_max_vecs; compress_free_pages(data, count); if (bio->bi_status != BLK_STS_OK) { struct dm_compress* dc = bio->bi_private; dc->io_failed = true; } bio_put(bio); } static void blkdev_pblk_write(struct dm_compress* dc, u64 pblk, u32 count, void *data) { struct bio* bio; bio = blkdev_pblk_io_prepare(dc, REQ_OP_WRITE, pblk, count, data); if (!bio) { printk(KERN_ERR "%s: out of memory\n", __func__); return; } bio->bi_end_io = blkdev_pblk_write_endio; bio->bi_private = dc; submit_bio(bio); } /************************************** * Zone pblk functions **************************************/ static int pbat_write(struct dm_compress* dc, struct zone_cache* zc) { u64 pblk; u32 count; void* pg; BUG_ON(!zc->pbat_data_cached); pblk = pbat_off(&dc->params, zc->zone); count = pbat_len(&dc->params); pg = compress_alloc_pages(PBLK_SIZE); if (!pg) { return -ENOMEM; } memcpy(pg, zc->pbat_buf, count * PBLK_SIZE); blkdev_pblk_write(dc, pblk, count, pg); zc->pbat_data_dirty = false; return 0; } static int pbat_flush(struct dm_compress* dc, struct zone_cache* zc) { int ret = 0; if (zc->pbat_data_dirty) { ret = pbat_write(dc, zc); } return ret; } /* Read zone physical block alloc bitmap */ static int pbat_read(struct dm_compress* dc, struct zone_cache* zc) { int ret; u64 pblk; u32 count; if (zc->pbat_data_cached) { return 0; } ret = pbat_flush(dc, zc); if (ret) { return ret; } pblk = pbat_off(&dc->params, zc->zone); count = pbat_len(&dc->params); ret = blkdev_pblk_read(dc, pblk, count, zc->pbat_buf); if (ret) { return ret; } zc->pbat_data_cached = true; return 0; } static u64 __pbat_alloc(struct dm_compress* dc, struct zone_cache* zc) { u32 pblk_count = pbat_len(&dc->params) * PBLK_SIZE_BITS; u32 idx; if (pbat_read(dc, zc) != 0) { printk(KERN_ERR "%s: pbat_read failed\n", __func__); return PBLK_NONE; } idx = cbd_bitmap_alloc(zc->pbat_buf, pblk_count); if (idx == pblk_count) { return PBLK_NONE; } zc->pbat_data_dirty = true; return idx; } static u64 pbat_alloc(struct dm_compress* dc, struct zone_cache* zc_hint) { u64 pblk; u32 zone_off; struct zone_cache* zc; pblk = __pbat_alloc(dc, zc_hint); if (pblk != PBLK_NONE) { return zone_data_off(&dc->params, zc_hint->zone) + pblk; } for (zone_off = 1; zone_off <= zc_hint->zone || zc_hint->zone + zone_off < dc->params.nr_zones; ++zone_off) { if (zone_off <= zc_hint->zone) { zc = zone_cache_get(dc, zc_hint->zone - zone_off); if (zc) { pblk = __pbat_alloc(dc, zc); if (zone_cache_put(dc, zc) != 0) { return PBLK_NONE; } if (pblk != PBLK_NONE) { return zone_data_off(&dc->params, zc->zone) + pblk; } } } if (zc_hint->zone + zone_off < dc->params.nr_zones) { zc = zone_cache_get(dc, zc_hint->zone + zone_off); if (zc) { pblk = __pbat_alloc(dc, zc); if (zone_cache_put(dc, zc) != 0) { return PBLK_NONE; } if (pblk != PBLK_NONE) { return zone_data_off(&dc->params, zc->zone) + pblk; } } } } printk(KERN_ERR "%s: fail, all zones full\n", __func__); return PBLK_NONE; } /* * XXX: When fine grained locking is in place, we won't need the zc param * or the put_zone flag. */ static int pbat_free(struct dm_compress* dc, struct zone_cache* zc, u64 pblk) { u32 zone_pblk_count = pbat_len(&dc->params) * PBLK_SIZE_BITS; bool put_zone = false; u32 zone; u32 idx; int ret; if (pblk < CBD_HEADER_BLOCKS) { printk(KERN_ERR "%s: pblk index is in header\n", __func__); return -EINVAL; } zone = (pblk - CBD_HEADER_BLOCKS) / zone_len(&dc->params); if (zone >= dc->params.nr_zones) { printk(KERN_ERR "%s: pblk zone out of bounds\n", __func__); return -EINVAL; } if (pblk < zone_data_off(&dc->params, zone)) { printk(KERN_ERR "%s: pblk index in metadata\n", __func__); return -EINVAL; } idx = pblk - zone_data_off(&dc->params, zone); if (idx >= zone_pblk_count) { printk(KERN_ERR "%s: pblk index out of bounds\n", __func__); return -EINVAL; } if (zone != zc->zone) { zc = zone_cache_get(dc, zone); put_zone = true; } ret = pbat_read(dc, zc); if (ret) { goto out_put; } cbd_bitmap_free(zc->pbat_buf, idx); zc->pbat_data_dirty = true; out_put: if (put_zone) { zone_cache_put(dc, zc); } return ret; } /************************************** * Zone lblk functions **************************************/ static int lbat_elem_write(struct dm_compress* dc, struct zone_cache* zc) { u32 zone; u32 zone_lblk; u32 elem_off; u32 elem_end; u32 rel_pblk; u32 count; u64 pblk; u8* buf; void* pg; BUG_ON(zc->lbat_elem_lblk == LBLK_NONE); BUG_ON(zc->lbat_data_pblk == PBLK_NONE); BUG_ON(zc->lbat_data_len == 0); zone = zc->lbat_elem_lblk / dc->params.lblk_per_zone; zone_lblk = zc->lbat_elem_lblk - (zc->zone * dc->params.lblk_per_zone); elem_off = lbat_elem_len(&dc->params) * zone_lblk; elem_end = elem_off + lbat_elem_len(&dc->params); rel_pblk = elem_off / PBLK_SIZE; count = zc->lbat_data_len; pblk = zc->lbat_data_pblk; buf = zc->lbat_buf + (elem_off - rel_pblk * PBLK_SIZE); lbat_elem_put(&dc->params, buf, zc->lbat_elem); pg = compress_alloc_pages(count * PBLK_SIZE); if (!pg) { return -ENOMEM; } memcpy(pg, zc->lbat_buf, count * PBLK_SIZE); blkdev_pblk_write(dc, pblk, count, pg); return 0; } static int lbat_elem_read(struct dm_compress* dc, struct zone_cache* zc, u64 lblk) { int ret; u32 zone; u32 zone_lblk; u32 elem_off; u32 elem_end; u32 rel_pblk; u32 count; u64 pblk; u8* buf; if (zc->lbat_elem_lblk == lblk) { return 0; } zone = lblk / dc->params.lblk_per_zone; zone_lblk = lblk - (zone * dc->params.lblk_per_zone); elem_off = lbat_elem_len(&dc->params) * zone_lblk; elem_end = elem_off + lbat_elem_len(&dc->params); rel_pblk = elem_off / PBLK_SIZE; count = 1 + (elem_end - 1) / PBLK_SIZE - (elem_off / PBLK_SIZE); pblk = lbat_off(&dc->params, zone) + rel_pblk; if (zc->lbat_data_pblk != pblk || zc->lbat_data_len < count) { ret = blkdev_pblk_read(dc, pblk, count, zc->lbat_buf); if (ret != 0) { return ret; } zc->lbat_data_pblk = pblk; zc->lbat_data_len = count; } buf = zc->lbat_buf + (elem_off - rel_pblk * PBLK_SIZE); lbat_elem_get(&dc->params, buf, zc->lbat_elem); zc->lbat_elem_lblk = lblk; return 0; } /************************************** * Logical block functions **************************************/ static inline bool lblk_is_zeros(struct cbd_params* params, struct zone_cache* zc) { #ifdef CBD_DETECT_ZERO_BLOCKS u32 off; u32 len = PBLK_SIZE * lblk_per_pblk(params); for (off = 0; off < len; ++off) { if (zc->lblk[off]) { return false; } } return true; #else return false; #endif } /* * Compress dc->lblk into dc->lz4_cbuf * * Returns number of bytes in cbuf or 0 for failure. */ static size_t lblk_compress(struct cbd_params* params, struct zone_cache* zc) { int ret; void *dbuf = zc->lblk; u32 dlen = PBLK_SIZE * lblk_per_pblk(params); void *cbuf = zc->lz4_cbuf; u32 clen = PBLK_SIZE * lblk_per_pblk(params); ret = LZ4_compress_default(dbuf, cbuf, dlen, clen, zc->lz4_wrkmem); if (ret <= 0) { return 0; } return (size_t)ret; } /* * Decompress dc->lz4_cbuf of size clen into dc->lblk * * Returns 0 for success, <0 for failure. */ static int lblk_decompress(struct cbd_params* params, struct zone_cache* zc, u32 clen) { int ret; void *cbuf = zc->lz4_cbuf; void *dbuf = zc->lblk; u32 dlen = PBLK_SIZE * lblk_per_pblk(params); ret = LZ4_decompress_safe(cbuf, dbuf, clen, dlen); if (ret != dlen) { printk(KERN_ERR "%s: failed, ret=%d (expected %u)\n", __func__, ret, (unsigned int)dlen); return -1; } return 0; } static int lblk_write(struct dm_compress* dc, struct zone_cache* zc) { int ret; u32 zone; u32 zone_lblk; size_t d_len; size_t c_len; u8* c_buf; u32 n; u64 pblk; zone = zc->zone; zone_lblk = zc->lblk_num - (zone * dc->params.lblk_per_zone); /* We must have a cached lblk elem */ BUG_ON(zc->lbat_elem_lblk == LBLK_NONE); d_len = PBLK_SIZE * lblk_per_pblk(&dc->params); if (lblk_is_zeros(&dc->params, zc)) { c_len = 0; c_buf = NULL; zc->lbat_elem->len = 0; } else { c_len = lblk_compress(&dc->params, zc); if (c_len > 0) { size_t c_blkrem = c_len % PBLK_SIZE; if (c_blkrem) { memset(zc->lz4_cbuf + c_len, 0, c_blkrem); } c_buf = zc->lz4_cbuf; zc->lbat_elem->len = c_len; } else { c_len = d_len; c_buf = zc->lblk; zc->lbat_elem->len = CBD_UNCOMPRESSED; } } for (n = 0; n < lblk_per_pblk(&dc->params); ++n) { if (c_len > PBLK_SIZE * n) { void* pg; pblk = zc->lbat_elem->pblk[n]; if (!pblk) { pblk = pbat_alloc(dc, zc); if (pblk == 0) { printk(KERN_ERR " pbat_alloc failed\n"); return -ENOSPC; } zc->lbat_elem->pblk[n] = pblk; } pg = compress_alloc_pages(PBLK_SIZE); if (!pg) { return -ENOMEM; } memcpy(pg, c_buf, PBLK_SIZE); blkdev_pblk_write(dc, pblk, 1, pg); c_buf += PBLK_SIZE; } else { pblk = zc->lbat_elem->pblk[n]; if (pblk) { zc->lbat_elem->pblk[n] = 0; ret = pbat_free(dc, zc, pblk); if (ret != 0) { printk(KERN_ERR " pbat_free failed\n"); return ret; } } } } ret = lbat_elem_write(dc, zc); if (ret != 0) { printk(KERN_ERR " lbat_elem_write failed\n"); return ret; } ret = pbat_flush(dc, zc); if (ret != 0) { printk(KERN_ERR " pbat_flush failed\n"); return ret; } zc->lblk_dirty = false; return 0; } static int lblk_flush(struct dm_compress* dc, struct zone_cache* zc) { int ret; if (zc->lblk_dirty) { ret = lblk_write(dc, zc); if (ret) { return ret; } } return 0; } static int lblk_read(struct dm_compress* dc, struct zone_cache* zc, u64 idx) { int ret; u32 zone; u32 zone_lblk; u32 c_len; u64 pblk; if (zc->lblk_num == idx) { return 0; } ret = lblk_flush(dc, zc); if (ret) { return ret; } zone = idx / dc->params.lblk_per_zone; zone_lblk = idx - (zone * dc->params.lblk_per_zone); ret = lbat_elem_read(dc, zc, idx); if (ret != 0) { printk(KERN_ERR " lbat_elem_read failed\n"); return ret; } c_len = zc->lbat_elem->len; if (c_len == 0) { memset(zc->lblk, 0, PBLK_SIZE * lblk_per_pblk(&dc->params)); } else { bool is_compressed = true; size_t d_len = PBLK_SIZE * lblk_per_pblk(&dc->params); size_t n; u8* p; if (c_len == CBD_UNCOMPRESSED) { is_compressed = false; c_len = d_len; } p = zc->lz4_cbuf; for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) { pblk = zc->lbat_elem->pblk[n]; BUG_ON(pblk == 0); ret = blkdev_pblk_read(dc, pblk, 1, p); if (ret != 0) { return ret; } } if (is_compressed) { if (lblk_decompress(&dc->params, zc, c_len) != 0) { printk(KERN_ERR " decompress failed\n"); return -1; } } else { memcpy(zc->lblk, zc->lz4_cbuf, d_len); } } zc->lblk_num = idx; return 0; } /************************************** * Zone cache functions **************************************/ static void zone_cache_reset(struct zone_cache* zc, u32 zone) { zc->zone = zone; zc->pbat_data_cached = false; zc->pbat_data_dirty = false; zc->lbat_data_pblk = PBLK_NONE; zc->lbat_data_len = 0; zc->lbat_elem_lblk = LBLK_NONE; zc->lblk_num = LBLK_NONE; zc->lblk_dirty = false; } static int zone_cache_flush(struct dm_compress* dc, struct zone_cache* zc) { int ret; ret = lblk_flush(dc, zc); if (ret) { return ret; } ret = pbat_flush(dc, zc); if (ret) { return ret; } return 0; } static struct zone_cache* zone_cache_get(struct dm_compress* dc, u32 zone) { struct zone_cache* zc; u32 idx; //printk(KERN_INFO "%s: zone=%u\n", __func__, (unsigned int)zone); mutex_lock(&dc->zc_lock); for (idx = 0; idx < dc->nr_zc; ++idx) { zc = &dc->zcache[idx]; if (zc->zone == zone) { mutex_lock(&zc->lock); goto out; } } for (idx = 0; idx < dc->nr_zc; ++idx) { zc = &dc->zcache[idx]; if (zc->zone == ZONE_NONE) { zone_cache_reset(zc, zone); mutex_lock(&zc->lock); goto out; } } for (idx = 0; idx < dc->nr_zc; ++idx) { zc = &dc->zcache[idx]; if (mutex_trylock(&zc->lock) == 1) { zone_cache_reset(zc, zone); goto out; } } printk(KERN_ERR "%s: Cannot get zone %u\n", __func__, (unsigned int)zone); zc = NULL; out: mutex_unlock(&dc->zc_lock); return zc; } static int zone_cache_put(struct dm_compress* dc, struct zone_cache* zc) { int ret; //printk(KERN_INFO "%s: zone=%u\n", __func__, (unsigned int)zc->zone); ret = zone_cache_flush(dc, zc); mutex_unlock(&zc->lock); return ret; } static void zone_cache_dtr(struct dm_compress* dc, struct zone_cache* zc) { compress_free_pages(zc->lblk, PBLK_SIZE * lblk_per_pblk(&dc->params)); zc->lblk = NULL; kfree(zc->lbat_elem); zc->lbat_elem = NULL; compress_free_pages(zc->lbat_buf, PBLK_SIZE * 2); zc->lbat_buf = NULL; compress_free_pages(zc->pbat_buf, PBLK_SIZE * pbat_len(&dc->params)); zc->pbat_buf = NULL; compress_free_pages(zc->lz4_cbuf, PBLK_SIZE * lblk_per_pblk(&dc->params)); zc->lz4_cbuf = NULL; kfree(zc->lz4_wrkmem); zc->lz4_wrkmem = NULL; } static int zone_cache_ctr(struct dm_compress* dc, struct zone_cache* zc) { zc->zone = ZONE_NONE; mutex_init(&zc->lock); zc->lz4_wrkmem = kmalloc(LZ4_compressBound(PBLK_SIZE * lblk_per_pblk(&dc->params)), GFP_KERNEL); if (!zc->lz4_wrkmem) { printk(KERN_ERR "%s: Failed to alloc lz4_wrkmem\n", __func__); goto out_nomem; } zc->lz4_cbuf = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params)); if (!zc->lz4_cbuf) { printk(KERN_ERR "%s: Failed to alloc lz4_cmem\n", __func__); goto out_nomem; } zc->pbat_data_cached = false; zc->pbat_data_dirty = false; zc->pbat_buf = compress_alloc_pages(PBLK_SIZE * pbat_len(&dc->params)); if (!zc->pbat_buf) { printk(KERN_ERR "%s: Failed to alloc pbat_buf\n", __func__); goto out_nomem; } zc->lbat_data_pblk = PBLK_NONE; zc->lbat_data_len = 0; zc->lbat_buf = compress_alloc_pages(PBLK_SIZE * 2); if (!zc->lbat_buf) { printk(KERN_ERR "%s: Failed to alloc lbat_buf\n", __func__); goto out_nomem; } zc->lbat_elem_lblk = LBLK_NONE; zc->lbat_elem = kmalloc(offsetof(struct lbat_elem, pblk[lblk_per_pblk(&dc->params)]), GFP_KERNEL); if (!zc->lbat_elem) { printk(KERN_ERR "%s: Failed to alloc lbat_elem\n", __func__); goto out_nomem; } zc->lblk_num = LBLK_NONE; zc->lblk_dirty = false; zc->lblk = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params)); if (!zc->lblk) { printk(KERN_ERR "%s: Failed to alloc lblk\n", __func__); goto out_nomem; } return 0; out_nomem: zone_cache_dtr(dc, zc); return -ENOMEM; } /************************************** * Main functions **************************************/ static int compress_open(struct dm_compress* dc, u64 dev_nr_pblks) { int err; u8 *pblkbuf; struct cbd_header header; u64 max_nr_zones; unsigned int n; pblkbuf = kmalloc(PBLK_SIZE, GFP_KERNEL); if (!pblkbuf) { return -ENOMEM; } err = blkdev_pblk_read(dc, 0, 1, pblkbuf); if (err) { printk(KERN_ERR "%s: failed to read header\n", __func__); goto out; } cbd_header_get(pblkbuf, &header); if (memcmp(header.magic, CBD_MAGIC, sizeof(header.magic)) != 0) { printk(KERN_ERR "%s: bad magic\n", __func__); err = -EINVAL; goto out; } if (header.version_major != CBD_VERSION_MAJOR) { printk(KERN_ERR "%s: bad version\n", __func__); err = -EINVAL; goto out; } if (header.version_minor != CBD_VERSION_MINOR) { printk(KERN_ERR "%s: bad version\n", __func__); err = -EINVAL; goto out; } if (header.params.lblk_shift < LBLK_SHIFT_MIN || header.params.lblk_shift > LBLK_SHIFT_MAX) { printk(KERN_ERR "%s: bad lblk_shift\n", __func__); err = -EINVAL; goto out; } /* XXX: validate minumum pblk using zone_off(max_zone+1) */ if (header.params.nr_pblk > dev_nr_pblks) { printk(KERN_ERR "%s: bad nr_pblk\n", __func__); err = -EINVAL; goto out; } max_nr_zones = (dev_nr_pblks - CBD_HEADER_BLOCKS) / zone_len(&header.params); if (header.params.nr_zones > max_nr_zones) { printk(KERN_ERR "%s: bad nr_zones\n", __func__); err = -EINVAL; goto out; } /* XXX: validate lblk_per_zone */ printk(KERN_INFO "%s: parameters...\n", __func__); printk(KERN_INFO " algorithm=%hu\n", (unsigned short)header.params.algorithm); printk(KERN_INFO " compression=%hu\n", (unsigned short)header.params.compression); printk(KERN_INFO " lblk_shift=%hu\n", (unsigned short)header.params.lblk_shift); printk(KERN_INFO " nr_pblk=%lu\n", (unsigned long)header.params.nr_pblk); printk(KERN_INFO " nr_zones=%u\n", (unsigned int)header.params.nr_zones); printk(KERN_INFO " lblk_per_zone=%u\n", (unsigned int)header.params.lblk_per_zone); memcpy(&dc->params, &header.params, sizeof(header.params)); mutex_init(&dc->zc_lock); dc->nr_zc = min(2 * num_online_cpus(), dc->params.nr_zones); dc->zcache = kmalloc(dc->nr_zc * sizeof(struct zone_cache), GFP_KERNEL); if (!dc->zcache) { printk(KERN_ERR "%s: out of memory\n", __func__); goto out; } for (n = 0; n < dc->nr_zc; ++n) { err = zone_cache_ctr(dc, &dc->zcache[n]); if (err) { printk(KERN_ERR "%s: failed to init zone cache\n", __func__); goto out; } } dc->io_queue = alloc_workqueue("kcompress_io", WQ_HIGHPRI | WQ_MEM_RECLAIM, 1); if (!dc->io_queue) { printk(KERN_ERR "%s: failed to alloc io_queue\n", __func__); err = -ENOMEM; goto out; } out: /* XXX: cleanup on error */ kfree(pblkbuf); return err; } static int compress_read(struct dm_compress *dc, struct bio *bio) { struct bio_vec bv; struct bvec_iter iter; int ret; u32 lblk_per_sector = lblk_per_pblk(&dc->params) * PBLK_PER_SECTOR; u32 lblk_len = lblk_per_sector * SECTOR_SIZE; bio_for_each_segment(bv, bio, iter) { sector_t lblk = iter.bi_sector / lblk_per_sector; u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE; u32 zone = lblk / dc->params.lblk_per_zone; struct zone_cache* zc = NULL; unsigned long flags; char* data; zc = zone_cache_get(dc, zone); /* Ensure the data is within the logical block */ if (lblk_off + bv.bv_len > lblk_len) { printk(KERN_ERR "%s: logical block bounds exceeded\n", __func__); return -EIO; } /* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */ ret = lblk_read(dc, zc, lblk); if (ret) { zone_cache_put(dc, zc); return ret; } data = bvec_kmap_irq(&bv, &flags); memcpy(data, zc->lblk + lblk_off, bv.bv_len); bvec_kunmap_irq(data, &flags); zone_cache_put(dc, zc); } return 0; } static int compress_write(struct dm_compress *dc, struct bio *bio) { struct bio_vec bv; struct bvec_iter iter; int ret; u32 lblk_per_sector = lblk_per_pblk(&dc->params) * PBLK_PER_SECTOR; u32 lblk_len = lblk_per_sector * SECTOR_SIZE; bio_for_each_segment(bv, bio, iter) { sector_t lblk = iter.bi_sector / lblk_per_sector; u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE; u32 zone = lblk / dc->params.lblk_per_zone; struct zone_cache* zc = NULL; unsigned long flags; char* data; zc = zone_cache_get(dc, zone); /* Ensure the data is within the logical block */ if (lblk_off + bv.bv_len > lblk_len) { printk(KERN_ERR "%s logical block bounds exceeded\n", __func__); printk(KERN_ERR " sector=%lu\n", (unsigned long)iter.bi_sector); printk(KERN_ERR " bv_len=%u bv_offset=%u\n", bv.bv_len, bv.bv_offset); printk(KERN_ERR " lblk=%lu lblk_off=%u\n", (unsigned long)lblk, lblk_off); return -EIO; } /* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */ ret = lblk_read(dc, zc, lblk); if (ret) { zone_cache_put(dc, zc); return ret; } data = bvec_kmap_irq(&bv, &flags); memcpy(zc->lblk + lblk_off, data, bv.bv_len); bvec_kunmap_irq(data, &flags); zc->lblk_dirty = true; zone_cache_put(dc, zc); } return 0; } static void compress_io(struct dm_compress_io* io) { int ret; struct dm_compress* dc = io->dc; struct bio* bio = io->bio; switch (bio_op(bio)) { case REQ_OP_READ: ret = compress_read(dc, bio); break; case REQ_OP_WRITE: ret = compress_write(dc, bio); break; default: printk(KERN_ERR "%s: unknown op in bio: %u\n", __func__, bio_op(bio)); ret = -EINVAL; } if (ret) { printk(KERN_ERR "%s: failed, ret=%d\n", __func__, ret); } bio->bi_status = (ret == 0 ? BLK_STS_OK : BLK_STS_IOERR); /* XXX */ bio_endio(bio); } #ifdef USE_WORKQUEUE static void compress_io_work(struct work_struct *work) { struct dm_compress_io *io = container_of(work, struct dm_compress_io, work); compress_io(io); } #endif /* * Usage: * echo " compress " | dmsetup create * Where: * start_sector is the starting sector of the backing device. * end_sector is the ending sector of the backing device. * compress is the name of this module. * backing_device is the name backing device. * args is: * create [lblk_shift=#] * open * compress_name is the name of the compress device. */ static int compress_ctr(struct dm_target *ti, unsigned int argc, char **argv) { int err; unsigned int argn; struct dm_compress *dc = NULL; u64 dev_nr_pblks; printk(KERN_INFO "%s: enter: argc=%u\n", __func__, argc); for (argn = 0; argn < argc; ++argn) { printk(KERN_INFO " ... arg[%u]=\"%s\"\n", argn, argv[argn]); } if (argc == 0) { ti->error = "No device specified"; return -EINVAL; } argn = 1; while (argn < argc) { const char* arg = argv[argn++]; const char* eq = strchr(arg, '='); if (!eq) { ti->error = "Invalid argument format"; return -EINVAL; } #if 0 if (!memcmp(arg, "verbose", 7)) { err = kstrtouint(eq + 1, 0, &verbose_level); if (err) { ti->error = "Failed to parse verbose"; return -EINVAL; } continue; } #endif ti->error = "Unrecognized argument"; return -EINVAL; } dc = kzalloc(sizeof(struct dm_compress), GFP_KERNEL); if (!dc) { ti->error = "Failed to allocate target"; return -ENOMEM; } if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dc->dev)) { ti->error = "Device lookup failed"; kfree(dc); return -EINVAL; } ti->private = dc; dev_nr_pblks = dm_target_pblk_size(ti); if (get_order(dev_nr_pblks) >= 48) { ti->error = "Device too large"; kfree(dc); return -EINVAL; } ti->per_io_data_size = ALIGN(sizeof(struct dm_compress_io), ARCH_KMALLOC_MINALIGN); err = compress_open(dc, dev_nr_pblks); if (err) { dm_put_device(ti, dc->dev); kfree(dc); return err; } printk(KERN_INFO "%s: success\n", __func__); return 0; } static void compress_dtr(struct dm_target *ti) { struct dm_compress *dc; unsigned int n; printk(KERN_INFO "%s: enter\n", __func__); dc = (struct dm_compress *)ti->private; if (dc->zcache) { for (n = 0; n < dc->nr_zc; ++n) { zone_cache_dtr(dc, &dc->zcache[n]); } kfree(dc->zcache); } if (dc->io_queue) { destroy_workqueue(dc->io_queue); } dm_put_device(ti, dc->dev); kfree(dc); } static int compress_map(struct dm_target *ti, struct bio *bio) { struct dm_compress *dc = (struct dm_compress *)ti->private; struct dm_compress_io *io; if (dc->io_failed) { return DM_MAPIO_KILL; } /* from dm-crypt.c */ if (unlikely(bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)) { bio_set_dev(bio, dc->dev->bdev); if (bio_sectors(bio)) { /* XXX: remap to underlying data */ } return DM_MAPIO_REMAPPED; } /* Synchronous I/O operations deadlock, so queue them. */ /* XXX: clone the bio? */ io = dm_per_bio_data(bio, ti->per_io_data_size); io->dc = dc; io->bio = bio; #ifdef USE_WORKQUEUE INIT_WORK(&io->work, compress_io_work); queue_work(dc->io_queue, &io->work); #else compress_io(io); #endif return DM_MAPIO_SUBMITTED; } static struct target_type compress_target = { .name = "compress", .version = { 1, 0, 0 }, .module = THIS_MODULE, .ctr = compress_ctr, .dtr = compress_dtr, .map = compress_map, }; static int __init dm_compress_init(void) { int res; res = dm_register_target(&compress_target); if (res < 0) { printk(KERN_ERR "Failed to register dm-compress: %d\n", res); } return res; } static void __exit dm_compress_exit(void) { dm_unregister_target(&compress_target); } module_init(dm_compress_init); module_exit(dm_compress_exit); MODULE_DESCRIPTION("compress target for transparent compression"); MODULE_AUTHOR("Tom Marshall "); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0");