/* * Copyright (c) 2019 Tom Marshall * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include /* * XXX * If we don't use a workqueue, pblk_read() stalls. Why? */ #define USE_WORKQUEUE 1 struct compress; /* per bio private data */ struct compress_io { struct compress* c; struct bio* bio; struct work_struct work; }; struct compress { struct dm_dev* dev; struct kobject kobj; struct completion kobj_unregister; struct compress_params kparams; struct compress_stats kstats; struct lbdcache* lc; struct workqueue_struct* io_workq; struct wait_queue_head init_waitq; struct task_struct* init_thread; }; static struct kobject* compress_kobj; static inline u64 blkdev_pages(struct block_device* bdev) { return i_size_read(bdev->bd_inode) >> PAGE_SHIFT; } static inline u64 blkdev_pblks(struct block_device* bdev, struct cbd_params* params) { return i_size_read(bdev->bd_inode) >> (SECTOR_SHIFT + params->pblk_shift); } static inline u64 target_pblks(struct dm_target* ti, struct cbd_params* params) { return ti->len >> params->pblk_shift; } static inline u64 logical_pblks(struct cbd_params* params) { return (params->lblk_per_zone << params->lblk_shift) * params->nr_zones; } /************************************** * Main functions **************************************/ static int compress_read_header(struct compress* c, char** errorp) { int ret = 0; struct page* page; u8 *buf; struct cbd_header header; page = cbd_alloc_page(); if (!page) { *errorp = "Out of memory"; return -ENOMEM; } buf = page_address(page); ret = pblk_read_wait(&c->kparams, 0, 1, page); if (ret) { *errorp = "Header: failed to read"; goto out; } memset(&header, 0, sizeof(header)); cbd_header_get(buf, &header); if (memcmp(header.magic, CBD_MAGIC, sizeof(header.magic)) != 0) { *errorp = "Header: bad magic"; ret = -EINVAL; goto out; } if (header.version_major != CBD_VERSION_MAJOR) { *errorp = "Header: unsupported major version"; ret = -EINVAL; goto out; } if (header.version_minor != CBD_VERSION_MINOR) { *errorp = "Header: unsupported minor version"; ret = -EINVAL; goto out; } if (cbd_compression_alg_get(&header.params) == CBD_ALG_NONE || cbd_compression_alg_get(&header.params) >= CBD_ALG_MAX) { *errorp = "Header: unknown compression algorithm"; ret = -EINVAL; goto out; } #ifndef COMPRESS_HAVE_LZ4 if (cbd_compression_alg_get(&header.params) == CBD_ALG_LZ4) { *errorp = "Header: compression algorithm lz4 is not built into kernel"; ret = -EINVAL; goto out; } #endif #ifndef COMPRESS_HAVE_ZLIB if (cbd_compression_alg_get(&header.params) == CBD_ALG_ZLIB) { *errorp = "Header: compression algorithm zlib is not built into kernel"; ret = -EINVAL; goto out; } #endif if (cbd_compression_level_get(&header.params) < 1 || cbd_compression_level_get(&header.params) > 9) { *errorp = "Header: compression level out of bounds"; ret = -EINVAL; goto out; } if (header.params.pblk_shift < PBLK_SHIFT_MIN || header.params.pblk_shift > PBLK_SHIFT_MAX) { *errorp = "Header: pblk_shift out of bounds"; ret = -EINVAL; goto out; } if (header.params.lblk_shift < LBLK_SHIFT_MIN || header.params.lblk_shift > LBLK_SHIFT_MAX) { *errorp = "Header: lblk_shift out of bounds"; ret = -EINVAL; goto out; } if (header.params.lba_elem_pblk_bytes != 2 && header.params.lba_elem_pblk_bytes != 4 && header.params.lba_elem_pblk_bytes != 6) { *errorp = "Header: lba_elem_pblk_bytes out of bounds"; ret = -EINVAL; goto out; } if (header.params.pbat_shift < PBAT_SHIFT_MIN || header.params.pbat_shift > PBAT_SHIFT_MAX) { *errorp = "Header: pbat_shift out of bounds"; ret = -EINVAL; goto out; } if (pbat_len(&header.params) * pblk_size(&header.params) > PAGE_SIZE) { *errorp = "Header: pbat size too large"; ret = -EINVAL; goto out; } if (lba_len(&header.params) > pblk_size(&header.params)) { *errorp = "Header: lba elem size too large"; ret = -EINVAL; goto out; } if ((zone_off(&header.params, header.params.nr_zones) >> 48) != 0) { *errorp = "Header: logical device too large"; ret = -EINVAL; goto out; } printk(KERN_INFO "%s: parameters...\n", __func__); printk(KERN_INFO " compression=0x%02x\n", (unsigned int)header.params.compression); printk(KERN_INFO " pblk_shift=%hu\n", (unsigned short)header.params.pblk_shift); printk(KERN_INFO " lblk_shift=%hu\n", (unsigned short)header.params.lblk_shift); printk(KERN_INFO " lba_elem_pblk_bytes=%hu\n", (unsigned short)header.params.lba_elem_pblk_bytes); printk(KERN_INFO " pbat_shift=%hu\n", (unsigned short)header.params.pbat_shift); printk(KERN_INFO " nr_zones=%u\n", (unsigned int)header.params.nr_zones); printk(KERN_INFO " lblk_per_zone=%u\n", (unsigned int)header.params.lblk_per_zone); printk(KERN_INFO "%s: stats...\n", __func__); printk(KERN_INFO " pblk_used=%lu\n", (unsigned long)header.stats.pblk_used); printk(KERN_INFO " lblk_used=%lu\n", (unsigned long)header.stats.lblk_used); memcpy(&c->kparams.params, &header.params, sizeof(header.params)); memcpy(&c->kstats.stats, &header.stats, sizeof(header.stats)); out: cbd_free_page(page); return ret; } static int compress_write_header(struct compress* c) { int ret = 0; struct page* page; u8* buf; struct cbd_header header; page = cbd_alloc_page(); if (!page) { return -ENOMEM; } buf = page_address(page); memset(&header, 0, sizeof(header)); memcpy(header.magic, CBD_MAGIC, sizeof(header.magic)); header.version_major = CBD_VERSION_MAJOR; header.version_minor = CBD_VERSION_MINOR; memcpy(&header.params, &c->kparams.params, sizeof(header.params)); memcpy(&header.stats, &c->kstats.stats, sizeof(header.stats)); cbd_header_put(buf, &header); ret = pblk_write_wait(&c->kparams, 0, 1, page); if (ret) { printk(KERN_ERR "%s: failed to write header\n", __func__); } cbd_free_page(page); return ret; } static int compress_write_header_dirty(struct compress* c) { int ret = 0; u16 save_flags; save_flags = c->kparams.params.flags; c->kparams.params.flags |= CBD_FLAG_DIRTY; ret = compress_write_header(c); c->kparams.params.flags = save_flags; return ret; } static void init_zone(struct compress* c, u32 zone, struct page* page) { u32 pblk_per_page = PAGE_SIZE / pblk_size(&c->kparams.params); u64 zone_pblk = zone_off(&c->kparams.params, zone); u32 nr_pblk = zone_metadata_len(&c->kparams.params); u32 pblk_idx; u64 pblkv[PBLK_IOV_MAX]; u32 iov_len; u32 n; pblk_idx = 0; while (pblk_idx < nr_pblk) { iov_len = min(nr_pblk - pblk_idx, pblk_per_page); for (n = 0; n < iov_len; ++n) { pblkv[n] = zone_pblk + pblk_idx++; } pblk_writev(&c->kparams, pblkv, iov_len, page); lock_page(page); } } static int init_zone_thread(void* arg) { int ret = 0; struct compress* c = arg; struct page* page; u8* buf; unsigned long now; unsigned long next_write; printk(KERN_INFO "%s: initializing zones\n", __func__); page = cbd_alloc_page(); if (!page) { printk(KERN_ERR "%s: Out of memory\n", __func__); return -ENOMEM; } buf = page_address(page); memset(buf, 0, PAGE_SIZE); lock_page(page); next_write = jiffies + 5*HZ; while (c->kparams.params.init_zones < c->kparams.params.nr_zones) { init_zone(c, c->kparams.params.init_zones, page); if (PageError(page)) { printk(KERN_ERR "%s: write failed\n", __func__); break; } ++c->kparams.params.init_zones; now = jiffies; if (time_after_eq(now, next_write)) { printk(KERN_INFO "%s: initialized %u/%u zones\n", __func__, c->kparams.params.init_zones, c->kparams.params.nr_zones); ret = compress_write_header_dirty(c); if (ret) { break; } wake_up_interruptible(&c->init_waitq); next_write = now + 5*HZ; } if (kthread_should_stop()) { break; } } cbd_free_page(page); compress_write_header_dirty(c); c->init_thread = NULL; wake_up_interruptible(&c->init_waitq); printk(KERN_INFO "%s: exit\n", __func__); return ret; } static int compress_read(struct compress *c, struct bio *bio) { struct lbd* lbd = NULL; struct bio_vec bv; struct bvec_iter iter; u32 lblk_per_sector = lblk_size(&c->kparams.params) >> SECTOR_SHIFT; bio_for_each_segment(bv, bio, iter) { u64 lblk = iter.bi_sector / lblk_per_sector; u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE; u32 lblk_zone = zone_for_lblk(&c->kparams.params, lblk); unsigned long flags; char* data; if (c->kparams.params.init_zones >= lblk_zone) { lbd = lbdcache_get(c->lc, lblk); if (!lbd) { return -EIO; } data = bvec_kmap_irq(&bv, &flags); lbd_data_read(lbd, lblk_off, bv.bv_len, data); bvec_kunmap_irq(data, &flags); lbdcache_put(c->lc, lbd); } else { data = bvec_kmap_irq(&bv, &flags); memset(data, 0, bv.bv_len); bvec_kunmap_irq(data, &flags); } } return 0; } static int compress_write(struct compress *c, struct bio *bio) { struct lbd* lbd = NULL; struct bio_vec bv; struct bvec_iter iter; u32 lblk_per_sector = lblk_size(&c->kparams.params) >> SECTOR_SHIFT; bio_for_each_segment(bv, bio, iter) { u64 lblk = iter.bi_sector / lblk_per_sector; u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE; u32 lblk_zone = zone_for_lblk(&c->kparams.params, lblk); int ret; unsigned long flags; char* data; if (c->kparams.params.init_zones < lblk_zone && c->init_thread != NULL) { ret = wait_event_interruptible(c->init_waitq, c->kparams.params.init_zones >= lblk_zone || c->init_thread == NULL); if (ret) { return ret; } } if (c->kparams.params.init_zones < lblk_zone) { return -EIO; } lbd = lbdcache_get(c->lc, lblk); if (!lbd) { return -EIO; } data = bvec_kmap_irq(&bv, &flags); lbd_data_write(lbd, lblk_off, bv.bv_len, data); bvec_kunmap_irq(data, &flags); lbdcache_put(c->lc, lbd); } return 0; } static void compress_io(struct compress_io* cio) { int ret; struct compress* c = cio->c; struct bio* bio = cio->bio; switch (bio_op(bio)) { case REQ_OP_READ: ret = compress_read(c, bio); break; case REQ_OP_WRITE: ret = compress_write(c, bio); break; default: printk(KERN_ERR "%s: unknown op in bio: %u\n", __func__, bio_op(bio)); ret = -EINVAL; } if (ret) { printk(KERN_ERR "%s: failed, ret=%d\n", __func__, ret); } bio->bi_status = (ret == 0 ? BLK_STS_OK : BLK_STS_IOERR); /* XXX */ bio_endio(bio); } #ifdef USE_WORKQUEUE static void compress_io_work(struct work_struct* work) { struct compress_io* cio = container_of(work, struct compress_io, work); compress_io(cio); } #endif /*** sysfs stuff ***/ typedef enum { attr_zone_init, attr_zone_total, attr_lblk_size, attr_pblk_used, attr_pblk_total, attr_lblk_used, attr_lblk_total, attr_pbat_r, attr_pbat_w, attr_lbatpblk_r, attr_lbatpblk_w, attr_lbd_r, attr_lbd_w, } attr_id_t; struct compress_attr { struct attribute attr; short attr_id; }; static ssize_t compress_attr_show(struct kobject* kobj, struct attribute* attr, char* buf) { struct compress* c = container_of(kobj, struct compress, kobj); struct compress_attr* a = container_of(attr, struct compress_attr, attr); u64 val = 0; mutex_lock(&c->kstats.lock); switch (a->attr_id) { case attr_zone_init: val = c->kparams.params.init_zones; break; case attr_zone_total: val = c->kparams.params.nr_zones; break; case attr_lblk_size: val = lblk_size(&c->kparams.params); break; case attr_pblk_used: val = c->kstats.stats.pblk_used; break; case attr_pblk_total: val = pbat_len(&c->kparams.params) * pblk_size_bits(&c->kparams.params) * c->kparams.params.nr_zones; break; case attr_lblk_used: val = c->kstats.stats.lblk_used; break; case attr_lblk_total: val = c->kparams.params.lblk_per_zone * c->kparams.params.nr_zones; break; case attr_pbat_r: val = c->kstats.pbat_r; break; case attr_pbat_w: val = c->kstats.pbat_w; break; case attr_lbatpblk_r: val = c->kstats.lbatpblk_r; break; case attr_lbatpblk_w: val = c->kstats.lbatpblk_w; break; case attr_lbd_r: val = c->kstats.lbd_r; break; case attr_lbd_w: val = c->kstats.lbd_w; break; } mutex_unlock(&c->kstats.lock); return snprintf(buf, PAGE_SIZE, "%lu\n", (unsigned long)val); } #define COMPRESS_ATTR(_name,_mode,_id) \ static struct compress_attr compress_attr_##_name = { \ .attr = { .name = __stringify(_name), .mode = _mode }, \ .attr_id = attr_##_id, \ } #define COMPRESS_ATTR_FUNC(_name,_mode) COMPRESS_ATTR(_name, _mode, _name) COMPRESS_ATTR_FUNC(zone_init, 0444); COMPRESS_ATTR_FUNC(zone_total, 0444); COMPRESS_ATTR_FUNC(lblk_size, 0444); COMPRESS_ATTR_FUNC(pblk_used, 0444); COMPRESS_ATTR_FUNC(pblk_total, 0444); COMPRESS_ATTR_FUNC(lblk_used, 0444); COMPRESS_ATTR_FUNC(lblk_total, 0444); COMPRESS_ATTR_FUNC(pbat_r, 0444); COMPRESS_ATTR_FUNC(pbat_w, 0444); COMPRESS_ATTR_FUNC(lbatpblk_r, 0444); COMPRESS_ATTR_FUNC(lbatpblk_w, 0444); COMPRESS_ATTR_FUNC(lbd_r, 0444); COMPRESS_ATTR_FUNC(lbd_w, 0444); #define ATTR_LIST(name) &compress_attr_##name.attr static struct attribute* compress_attrs[] = { ATTR_LIST(zone_init), ATTR_LIST(zone_total), ATTR_LIST(lblk_size), ATTR_LIST(pblk_used), ATTR_LIST(pblk_total), ATTR_LIST(lblk_used), ATTR_LIST(lblk_total), ATTR_LIST(pbat_r), ATTR_LIST(pbat_w), ATTR_LIST(lbatpblk_r), ATTR_LIST(lbatpblk_w), ATTR_LIST(lbd_r), ATTR_LIST(lbd_w), NULL }; #undef ATTR_LIST static void compress_sysfs_release(struct kobject* kobj) { struct compress* c = container_of(kobj, struct compress, kobj); complete(&c->kobj_unregister); } static const struct sysfs_ops compress_attr_ops = { .show = compress_attr_show, }; static struct kobj_type compress_ktype = { .default_attrs = compress_attrs, .sysfs_ops = &compress_attr_ops, .release = compress_sysfs_release, }; static int compress_register_sysfs(struct compress* c) { int err; char name[32]; snprintf(name, sizeof(name), "%pg", c->dev->bdev); init_completion(&c->kobj_unregister); err = kobject_init_and_add(&c->kobj, &compress_ktype, compress_kobj, "%s", name); if (err) { kobject_put(&c->kobj); wait_for_completion(&c->kobj_unregister); } return err; } static void compress_unregister_sysfs(struct compress* c) { kobject_del(&c->kobj); } static void __compress_dtr(struct compress* c) { printk(KERN_INFO "%s: enter\n", __func__); if (c->init_thread) { kthread_stop(c->init_thread); c->init_thread = NULL; } if (c->lc) { lbdcache_dtr(c->lc); kfree(c->lc); } if (c->io_workq) { destroy_workqueue(c->io_workq); } compress_unregister_sysfs(c); } /* * Usage: * echo " compress " | dmsetup create * Where: * start_sector is the starting sector of the backing device. * end_sector is the ending sector of the backing device. * compress is the name of this module. * backing_device is the name backing device. * args may include: * cache_pages=# * compress_name is the name of the compress device. */ static int compress_ctr(struct dm_target *ti, unsigned int argc, char **argv) { int ret; unsigned int argn; u32 cache_pages = 0; bool sync = false; struct compress *c = NULL; printk(KERN_INFO "%s: enter: argc=%u\n", __func__, argc); for (argn = 0; argn < argc; ++argn) { printk(KERN_INFO " ... arg[%u]=\"%s\"\n", argn, argv[argn]); } if (argc == 0) { ti->error = "No device specified"; return -EINVAL; } argn = 1; while (argn < argc) { char* arg = argv[argn++]; char* val = NULL; char* eq = strchr(arg, '='); int err; if (eq) { *eq = '\0'; val = eq + 1; } /* XXX: Parse suffixes */ if (!strcmp(arg, "cache_pages")) { err = kstrtouint(eq + 1, 0, &cache_pages); if (err) { ti->error = "Failed to parse cache_pages"; return -EINVAL; } continue; } if (!strcmp(arg, "sync")) { sync = true; } ti->error = "Unrecognized argument"; return -EINVAL; } c = kzalloc(sizeof(struct compress), GFP_KERNEL); if (!c) { ti->error = "Out of memory"; return -ENOMEM; } if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev)) { ti->error = "Device lookup failed"; kfree(c); return -EINVAL; } ti->private = c; ti->per_io_data_size = ALIGN(sizeof(struct compress_io), ARCH_KMALLOC_MINALIGN); ret = compress_register_sysfs(c); if (ret) { ti->error = "Failed to register sysfs"; goto err; } c->kparams.dev = c->dev->bdev; mutex_init(&c->kstats.lock); ret = compress_read_header(c, &ti->error); if (ret) { /* ti->error already set */ goto err; } if (blkdev_pblks(c->dev->bdev, &c->kparams.params) < zone_off(&c->kparams.params, c->kparams.params.nr_zones)) { printk(KERN_ERR "%s: physical device too small: " "actual=%lu, needed=%lu\n", __func__, (unsigned long)blkdev_pblks(c->dev->bdev, &c->kparams.params), (unsigned long)zone_off(&c->kparams.params, c->kparams.params.nr_zones)); ti->error = "Physical device too small"; ret = -EINVAL; goto err; } if (target_pblks(ti, &c->kparams.params) != logical_pblks(&c->kparams.params)) { printk(KERN_WARNING "%s: incorrect target device size: " "expected pblks=%lu, actual pblks=%lu\n", __func__, (unsigned long)logical_pblks(&c->kparams.params), (unsigned long)target_pblks(ti, &c->kparams.params)); } if (!cache_pages) { /* Minimum of 1/1k RAM and 1/64k device size */ cache_pages = min((unsigned int)(totalram_pages >> 10), (unsigned int)(blkdev_pages(c->dev->bdev) >> 16)); if (cache_pages < 32 * 2 * num_online_cpus()) { cache_pages = 32 * 2 * num_online_cpus(); } } printk(KERN_INFO "%s: totalram_pages=%lu blkdev_pages=%lu cache_pages=%u\n", __func__, totalram_pages, (unsigned long)blkdev_pages(c->dev->bdev), cache_pages); if (c->kparams.params.flags & CBD_FLAG_DIRTY) { printk(KERN_INFO "Warning: device was not properly closed\n"); } if (dm_table_get_mode(ti->table) & FMODE_WRITE) { ret = compress_write_header_dirty(c); if (ret) { ti->error = "Failed to write header"; goto err; } } /* XXX: validate lblk_per_zone */ c->lc = kmalloc(lbdcache_size(), GFP_KERNEL); if (!c->lc) { ti->error = "Out of memory"; ret = -ENOMEM; goto err; } if (!lbdcache_ctr(c->lc, &c->kparams, &c->kstats, cache_pages, sync)) { ti->error = "Failed to init logical block cache"; ret = -ENOMEM; goto err; } c->io_workq = alloc_workqueue("compress_io", WQ_HIGHPRI | WQ_MEM_RECLAIM, 1); if (!c->io_workq) { ti->error = "Failed to alloc io_workq"; ret = -ENOMEM; goto err; } init_waitqueue_head(&c->init_waitq); if (c->kparams.params.init_zones < c->kparams.params.nr_zones) { c->init_thread = kthread_run(init_zone_thread, c, "compress_zone_init"); if (IS_ERR(c->init_thread)) { ti->error = "Failed to start zone init thread"; ret = PTR_ERR(c->init_thread); c->init_thread = NULL; goto err; } } printk(KERN_INFO "%s: success\n", __func__); return 0; err: __compress_dtr(c); dm_put_device(ti, c->dev); kfree(c); return ret; } static void compress_dtr(struct dm_target *ti) { int ret; struct compress* c = ti->private; printk(KERN_INFO "%s: enter\n", __func__); __compress_dtr(c); if (dm_table_get_mode(ti->table) & FMODE_WRITE) { ret = compress_write_header(c); if (ret) { printk(KERN_INFO "Warning: failed to write header\n"); } } dm_put_device(ti, c->dev); kfree(c); } static int compress_map(struct dm_target *ti, struct bio *bio) { struct compress *c = ti->private; struct compress_io *cio; if (c->kparams.params.flags & CBD_FLAG_ERROR) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); return DM_MAPIO_SUBMITTED; /* XXXX: DM_MAPIO_KILL? */ } /* from dm-crypt.c */ if (unlikely(bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)) { bio_set_dev(bio, c->dev->bdev); if (bio_sectors(bio)) { /* XXX: remap to underlying data */ } return DM_MAPIO_REMAPPED; } /* Synchronous I/O operations deadlock, so queue them. */ /* XXX: clone the bio? */ cio = dm_per_bio_data(bio, ti->per_io_data_size); cio->c = c; cio->bio = bio; #ifdef USE_WORKQUEUE INIT_WORK(&cio->work, compress_io_work); queue_work(c->io_workq, &cio->work); #else compress_io(io); #endif return DM_MAPIO_SUBMITTED; } static struct target_type compress_target = { .name = "compress", .version = { 1, 0, 0 }, .module = THIS_MODULE, .ctr = compress_ctr, .dtr = compress_dtr, .map = compress_map, }; static int __init dm_compress_init(void) { int res; compress_kobj = kobject_create_and_add("compress", fs_kobj); if (!compress_kobj) { printk(KERN_ERR "Failed to add sysfs kobj\n"); return -ENOMEM; } res = dm_register_target(&compress_target); if (res < 0) { printk(KERN_ERR "Failed to register dm-compress: %d\n", res); } return res; } static void __exit dm_compress_exit(void) { dm_unregister_target(&compress_target); if (compress_kobj) { kobject_put(compress_kobj); compress_kobj = NULL; } } module_init(dm_compress_init); module_exit(dm_compress_exit); MODULE_DESCRIPTION("compress target for transparent compression"); MODULE_AUTHOR("Tom Marshall "); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0");