cbd/dm-compress/compress.c

544 lines
14 KiB
C

/*
* Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/device-mapper.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/lz4.h>
#include <linux/dm-compress.h>
// XXX: find a better name for this, something about storage vs. speed.
// XXX: should this be in cbd_params?
// #define CBD_DETECT_ZERO_BLOCKS
/*
* XXX
* If we don't use a workqueue, pblk_read() stalls. Why?
*/
#define USE_WORKQUEUE 1
struct compress;
/* per bio private data */
struct compress_io {
struct compress* c;
struct bio* bio;
struct work_struct work;
};
struct compress
{
struct dm_dev* dev;
struct cbd_params params;
struct lbdcache* lc;
struct workqueue_struct* io_workq;
};
static inline u64
blkdev_pblk_size(struct block_device *bdev)
{
return i_size_read(bdev->bd_inode) >> PBLK_SHIFT;
}
static inline u64
dm_target_pblk_size(struct dm_target* ti)
{
return ti->len >> (PBLK_SHIFT - SECTOR_SHIFT);
}
/**************************************
* Main functions
**************************************/
static int
compress_read_header(struct compress* c)
{
int ret = 0;
struct page* pblkpage;
u8 *pblkbuf;
struct page* iopagev[1];
struct cbd_header header;
pblkpage = cbd_alloc_page();
if (!pblkpage) {
return -ENOMEM;
}
pblkbuf = page_address(pblkpage);
iopagev[0] = pblkpage;
ret = pblk_read_wait(c->dev->bdev, 0, 1, iopagev);
if (ret) {
printk(KERN_ERR "%s: failed to read header\n", __func__);
goto out;
}
memset(&header, 0, sizeof(header));
cbd_header_get(pblkbuf, &header);
header.params.priv = c->dev->bdev;
if (memcmp(header.magic, CBD_MAGIC, sizeof(header.magic)) != 0) {
printk(KERN_ERR "%s: bad magic\n", __func__);
ret = -EINVAL;
goto out;
}
if (header.version_major != CBD_VERSION_MAJOR) {
printk(KERN_ERR "%s: bad version\n", __func__);
ret = -EINVAL;
goto out;
}
if (header.version_minor != CBD_VERSION_MINOR) {
printk(KERN_ERR "%s: bad version\n", __func__);
ret = -EINVAL;
goto out;
}
if (header.params.algorithm == CBD_ALG_NONE ||
header.params.algorithm >= CBD_ALG_MAX) {
printk(KERN_ERR "%s: bad algorithm\n", __func__);
ret = -EINVAL;
goto out;
}
#ifndef COMPRESS_HAVE_LZ4
if (header.params.algorithm == CBD_ALG_LZ4) {
printk(KERN_ERR "%s: algorithm lz4 is not built into kernel\n", __func__);
ret = -EINVAL;
goto out;
}
#endif
#ifndef COMPRESS_HAVE_ZLIB
if (header.params.algorithm == CBD_ALG_ZLIB) {
printk(KERN_ERR "%s: algorithm zlib is not built into kernel\n", __func__);
ret = -EINVAL;
goto out;
}
#endif
if (header.params.compression < 1 || header.params.compression > 9) {
printk(KERN_ERR "%s: bad compression\n", __func__);
ret = -EINVAL;
goto out;
}
if (header.params.lblk_shift < LBLK_SHIFT_MIN ||
header.params.lblk_shift > LBLK_SHIFT_MAX) {
printk(KERN_ERR "%s: bad lblk_shift\n", __func__);
ret = -EINVAL;
goto out;
}
printk(KERN_INFO "%s: parameters...\n", __func__);
printk(KERN_INFO " algorithm=%hu\n", (unsigned short)header.params.algorithm);
printk(KERN_INFO " compression=%hu\n", (unsigned short)header.params.compression);
printk(KERN_INFO " pbat_len=%hu\n", (unsigned short)header.params.pbat_len);
printk(KERN_INFO " lblk_shift=%hu\n", (unsigned short)header.params.lblk_shift);
printk(KERN_INFO " nr_pblk=%lu\n", (unsigned long)header.params.nr_pblk);
printk(KERN_INFO " nr_zones=%u\n", (unsigned int)header.params.nr_zones);
printk(KERN_INFO " lblk_per_zone=%u\n", (unsigned int)header.params.lblk_per_zone);
memcpy(&c->params, &header.params, sizeof(header.params));
out:
cbd_free_page(pblkpage);
return ret;
}
static int
compress_write_header(struct compress* c)
{
int ret = 0;
struct page* pblkpage;
u8* pblkbuf;
struct cbd_header header;
struct page* iopagev[1];
pblkpage = cbd_alloc_page();
if (!pblkpage) {
return -ENOMEM;
}
pblkbuf = page_address(pblkpage);
memset(&header, 0, sizeof(header));
memcpy(header.magic, CBD_MAGIC, sizeof(header.magic));
header.version_major = CBD_VERSION_MAJOR;
header.version_minor = CBD_VERSION_MINOR;
memcpy(&header.params, &c->params, sizeof(header.params));
cbd_header_put(pblkbuf, &header);
iopagev[0] = pblkpage;
ret = pblk_write_wait(c->params.priv, 0, 1, iopagev);
if (ret) {
printk(KERN_ERR "%s: failed to write header\n", __func__);
}
cbd_free_page(pblkpage);
return ret;
}
static struct lbd*
compress_lbdcache_swap(struct compress* c, u64 lblk, struct lbd* oldlbd)
{
struct lbd* lbd;
/* Get new data before putting old data to avoid flush */
lbd = lbdcache_get(c->lc, lblk);
if (!lbd) {
printk(KERN_ERR "%s: lbdcache_get failed\n", __func__);
lbdcache_put(c->lc, oldlbd);
return NULL;
}
if (lbdcache_put(c->lc, oldlbd) != 0) {
printk(KERN_ERR "%s: failed to put oldlbd\n", __func__);
lbdcache_put(c->lc, lbd);
return NULL;
}
return lbd;
}
static int
compress_read(struct compress *c, struct bio *bio)
{
struct lbd* lbd = NULL;
struct bio_vec bv;
struct bvec_iter iter;
int ret;
u32 lblk_per_sector = lblk_per_pblk(&c->params) * PBLK_PER_SECTOR;
u64 last_lblk = LBLK_NONE;
bio_for_each_segment(bv, bio, iter) {
u64 lblk = iter.bi_sector / lblk_per_sector;
u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
unsigned long flags;
char* data;
if (lblk != last_lblk) {
lbd = compress_lbdcache_swap(c, lblk, lbd);
if (!lbd) {
return -EIO;
}
last_lblk = lblk;
}
data = bvec_kmap_irq(&bv, &flags);
lbd_data_read(lbd, lblk_off, bv.bv_len, data);
bvec_kunmap_irq(data, &flags);
}
ret = lbdcache_put(c->lc, lbd);
return ret;
}
static int
compress_write(struct compress *c, struct bio *bio)
{
struct lbd* lbd = NULL;
struct bio_vec bv;
struct bvec_iter iter;
int ret;
u32 lblk_per_sector = lblk_per_pblk(&c->params) * PBLK_PER_SECTOR;
u64 last_lblk = LBLK_NONE;
bio_for_each_segment(bv, bio, iter) {
u64 lblk = iter.bi_sector / lblk_per_sector;
u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
unsigned long flags;
char* data;
if (lblk != last_lblk) {
lbd = compress_lbdcache_swap(c, lblk, lbd);
if (!lbd) {
return -EIO;
}
last_lblk = lblk;
}
data = bvec_kmap_irq(&bv, &flags);
lbd_data_write(lbd, lblk_off, bv.bv_len, data);
bvec_kunmap_irq(data, &flags);
}
ret = lbdcache_put(c->lc, lbd);
return ret;
}
static void
compress_io(struct compress_io* cio)
{
int ret;
struct compress* c = cio->c;
struct bio* bio = cio->bio;
switch (bio_op(bio)) {
case REQ_OP_READ:
ret = compress_read(c, bio);
break;
case REQ_OP_WRITE:
ret = compress_write(c, bio);
break;
default:
printk(KERN_ERR "%s: unknown op in bio: %u\n", __func__, bio_op(bio));
ret = -EINVAL;
}
if (ret) {
printk(KERN_ERR "%s: failed, ret=%d\n", __func__, ret);
}
bio->bi_status = (ret == 0 ? BLK_STS_OK : BLK_STS_IOERR); /* XXX */
bio_endio(bio);
}
#ifdef USE_WORKQUEUE
static void
compress_io_work(struct work_struct* work)
{
struct compress_io* cio = container_of(work, struct compress_io, work);
compress_io(cio);
}
#endif
/*
* Usage:
* echo "<start_sector> <end_sector> compress <backing_device> <args...>" | dmsetup create <compress_name>
* Where:
* start_sector is the starting sector of the backing device.
* end_sector is the ending sector of the backing device.
* compress is the name of this module.
* backing_device is the name backing device.
* args is:
* create [lblk_shift=#]
* open
* compress_name is the name of the compress device.
*/
static int
compress_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
int ret;
unsigned int argn;
struct compress *c = NULL;
u64 dev_nr_pblks;
printk(KERN_INFO "%s: enter: argc=%u\n", __func__, argc);
for (argn = 0; argn < argc; ++argn) {
printk(KERN_INFO " ... arg[%u]=\"%s\"\n", argn, argv[argn]);
}
if (argc == 0) {
ti->error = "No device specified";
return -EINVAL;
}
argn = 1;
while (argn < argc) {
const char* arg = argv[argn++];
const char* val = NULL;
const char* eq = strchr(arg, '=');
if (eq) {
val = eq + 1;
}
#if 0
if (!memcmp(arg, "verbose", 7)) {
err = kstrtouint(eq + 1, 0, &verbose_level);
if (err) {
ti->error = "Failed to parse verbose";
return -EINVAL;
}
continue;
}
#endif
ti->error = "Unrecognized argument";
return -EINVAL;
}
c = kzalloc(sizeof(struct compress), GFP_KERNEL);
if (!c) {
ti->error = "Failed to allocate target";
return -ENOMEM;
}
if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev)) {
ti->error = "Device lookup failed";
kfree(c);
return -EINVAL;
}
ti->private = c;
dev_nr_pblks = dm_target_pblk_size(ti);
if (get_order(dev_nr_pblks) >= 48) {
ti->error = "Device too large";
ret = -EINVAL;
goto err;
}
ti->per_io_data_size = ALIGN(sizeof(struct compress_io), ARCH_KMALLOC_MINALIGN);
ret = compress_read_header(c);
if (ret) {
goto err;
}
if (c->params.flags & CBD_FLAG_DIRTY) {
printk(KERN_INFO "Warning: device was not properly closed\n");
}
if (dm_table_get_mode(ti->table) & FMODE_WRITE) {
u16 save_flags = c->params.flags;
c->params.flags |= CBD_FLAG_DIRTY;
ret = compress_write_header(c);
c->params.flags = save_flags;
if (ret) {
goto err;
}
}
/* XXX: validate minumum pblk using zone_off(max_zone+1) */
if (c->params.nr_pblk > dev_nr_pblks) {
printk(KERN_ERR "%s: bad nr_pblk\n", __func__);
ret = -EINVAL;
goto err;
}
if (c->params.nr_zones > zone_for_pblk(&c->params, dev_nr_pblks)) {
printk(KERN_ERR "%s: bad nr_zones\n", __func__);
ret = -EINVAL;
goto err;
}
/* XXX: validate lblk_per_zone */
c->lc = kmalloc(lbdcache_size(), GFP_KERNEL);
if (!c->lc) {
printk(KERN_ERR "Failed to alloc lbdcache\n");
ret = -ENOMEM;
goto err;
}
if (!lbdcache_ctr(c->lc, &c->params)) {
printk(KERN_ERR "Failed to init logical block cache\n");
ret = -ENOMEM;
goto err;
}
c->io_workq = alloc_workqueue("compress_io", WQ_HIGHPRI | WQ_MEM_RECLAIM, 1);
if (!c->io_workq) {
printk(KERN_ERR "%s: failed to alloc io_workq\n", __func__);
ret = -ENOMEM;
goto err;
}
printk(KERN_INFO "%s: success\n", __func__);
return 0;
err:
dm_put_device(ti, c->dev);
kfree(c);
return ret;
}
static void
compress_dtr(struct dm_target *ti)
{
int ret;
struct compress *c;
printk(KERN_INFO "%s: enter\n", __func__);
c = ti->private;
if (dm_table_get_mode(ti->table) & FMODE_WRITE) {
ret = compress_write_header(c);
if (ret) {
printk(KERN_INFO "Warning: failed to write header\n");
}
}
lbdcache_dtr(c->lc);
kfree(c->lc);
if (c->io_workq) {
destroy_workqueue(c->io_workq);
}
dm_put_device(ti, c->dev);
kfree(c);
}
static int
compress_map(struct dm_target *ti, struct bio *bio)
{
struct compress *c = ti->private;
struct compress_io *cio;
/* from dm-crypt.c */
if (unlikely(bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)) {
bio_set_dev(bio, c->dev->bdev);
if (bio_sectors(bio)) {
/* XXX: remap to underlying data */
}
return DM_MAPIO_REMAPPED;
}
/* Synchronous I/O operations deadlock, so queue them. */
/* XXX: clone the bio? */
cio = dm_per_bio_data(bio, ti->per_io_data_size);
cio->c = c;
cio->bio = bio;
#ifdef USE_WORKQUEUE
INIT_WORK(&cio->work, compress_io_work);
queue_work(c->io_workq, &cio->work);
#else
compress_io(io);
#endif
return DM_MAPIO_SUBMITTED;
}
static struct target_type compress_target = {
.name = "compress",
.version = { 1, 0, 0 },
.module = THIS_MODULE,
.ctr = compress_ctr,
.dtr = compress_dtr,
.map = compress_map,
};
static int __init
dm_compress_init(void)
{
int res;
res = dm_register_target(&compress_target);
if (res < 0) {
printk(KERN_ERR "Failed to register dm-compress: %d\n", res);
}
return res;
}
static void __exit
dm_compress_exit(void)
{
dm_unregister_target(&compress_target);
}
module_init(dm_compress_init);
module_exit(dm_compress_exit);
MODULE_DESCRIPTION("compress target for transparent compression");
MODULE_AUTHOR("Tom Marshall <tdm.code@gmail.com>");
MODULE_LICENSE("GPL");
MODULE_VERSION("1.0");