543 lines
14 KiB
C
543 lines
14 KiB
C
/*
|
|
* Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
* 02110-1301, USA.
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/init.h>
|
|
#include <linux/bio.h>
|
|
#include <linux/device-mapper.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/mutex.h>
|
|
|
|
#include <linux/lz4.h>
|
|
|
|
#include <linux/dm-compress.h>
|
|
|
|
// XXX: find a better name for this, something about storage vs. speed.
|
|
// XXX: should this be in cbd_params?
|
|
// #define CBD_DETECT_ZERO_BLOCKS
|
|
|
|
/*
|
|
* XXX
|
|
* If we don't use a workqueue, pblk_read() stalls. Why?
|
|
*/
|
|
#define USE_WORKQUEUE 1
|
|
|
|
struct compress;
|
|
|
|
/* per bio private data */
|
|
struct compress_io {
|
|
struct compress* c;
|
|
struct bio* bio;
|
|
struct work_struct work;
|
|
};
|
|
|
|
struct compress
|
|
{
|
|
struct dm_dev* dev;
|
|
|
|
struct cbd_params params;
|
|
struct cbd_stats stats;
|
|
struct lbdcache* lc;
|
|
|
|
struct workqueue_struct* io_workq;
|
|
};
|
|
|
|
static inline u64
|
|
blkdev_pblk_size(struct block_device *bdev)
|
|
{
|
|
return i_size_read(bdev->bd_inode) >> PBLK_SHIFT;
|
|
}
|
|
|
|
static inline u64
|
|
dm_target_pblk_size(struct dm_target* ti)
|
|
{
|
|
return ti->len >> (PBLK_SHIFT - SECTOR_SHIFT);
|
|
}
|
|
|
|
/**************************************
|
|
* Main functions
|
|
**************************************/
|
|
|
|
static int
|
|
compress_read_header(struct compress* c)
|
|
{
|
|
int ret = 0;
|
|
struct page* pblkpage;
|
|
u8 *pblkbuf;
|
|
struct page* iopagev[1];
|
|
struct cbd_header header;
|
|
|
|
pblkpage = cbd_alloc_page();
|
|
if (!pblkpage) {
|
|
return -ENOMEM;
|
|
}
|
|
pblkbuf = page_address(pblkpage);
|
|
iopagev[0] = pblkpage;
|
|
|
|
header.params.priv = c->dev->bdev;
|
|
ret = pblk_read_wait(&header.params, 0, 1, iopagev);
|
|
if (ret) {
|
|
printk(KERN_ERR "%s: failed to read header\n", __func__);
|
|
goto out;
|
|
}
|
|
memset(&header, 0, sizeof(header));
|
|
cbd_header_get(pblkbuf, &header);
|
|
header.params.priv = c->dev->bdev;
|
|
|
|
if (memcmp(header.magic, CBD_MAGIC, sizeof(header.magic)) != 0) {
|
|
printk(KERN_ERR "%s: bad magic\n", __func__);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (header.version_major != CBD_VERSION_MAJOR) {
|
|
printk(KERN_ERR "%s: bad version\n", __func__);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (header.version_minor != CBD_VERSION_MINOR) {
|
|
printk(KERN_ERR "%s: bad version\n", __func__);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (header.params.algorithm == CBD_ALG_NONE ||
|
|
header.params.algorithm >= CBD_ALG_MAX) {
|
|
printk(KERN_ERR "%s: bad algorithm\n", __func__);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
#ifndef COMPRESS_HAVE_LZ4
|
|
if (header.params.algorithm == CBD_ALG_LZ4) {
|
|
printk(KERN_ERR "%s: algorithm lz4 is not built into kernel\n", __func__);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
#endif
|
|
#ifndef COMPRESS_HAVE_ZLIB
|
|
if (header.params.algorithm == CBD_ALG_ZLIB) {
|
|
printk(KERN_ERR "%s: algorithm zlib is not built into kernel\n", __func__);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
#endif
|
|
if (header.params.compression < 1 || header.params.compression > 9) {
|
|
printk(KERN_ERR "%s: bad compression\n", __func__);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
if (header.params.lblk_shift < LBLK_SHIFT_MIN ||
|
|
header.params.lblk_shift > LBLK_SHIFT_MAX) {
|
|
printk(KERN_ERR "%s: bad lblk_shift\n", __func__);
|
|
ret = -EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
printk(KERN_INFO "%s: parameters...\n", __func__);
|
|
printk(KERN_INFO " algorithm=%hu\n", (unsigned short)header.params.algorithm);
|
|
printk(KERN_INFO " compression=%hu\n", (unsigned short)header.params.compression);
|
|
printk(KERN_INFO " pbat_len=%hu\n", (unsigned short)header.params.pbat_len);
|
|
printk(KERN_INFO " lblk_shift=%hu\n", (unsigned short)header.params.lblk_shift);
|
|
printk(KERN_INFO " nr_pblk=%lu\n", (unsigned long)header.params.nr_pblk);
|
|
printk(KERN_INFO " nr_zones=%u\n", (unsigned int)header.params.nr_zones);
|
|
printk(KERN_INFO " lblk_per_zone=%u\n", (unsigned int)header.params.lblk_per_zone);
|
|
|
|
memcpy(&c->params, &header.params, sizeof(header.params));
|
|
memcpy(&c->stats, &header.stats, sizeof(header.stats));
|
|
|
|
out:
|
|
cbd_free_page(pblkpage);
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
compress_write_header(struct compress* c)
|
|
{
|
|
int ret = 0;
|
|
struct page* pblkpage;
|
|
u8* pblkbuf;
|
|
struct cbd_header header;
|
|
struct page* iopagev[1];
|
|
|
|
pblkpage = cbd_alloc_page();
|
|
if (!pblkpage) {
|
|
return -ENOMEM;
|
|
}
|
|
pblkbuf = page_address(pblkpage);
|
|
memset(&header, 0, sizeof(header));
|
|
memcpy(header.magic, CBD_MAGIC, sizeof(header.magic));
|
|
header.version_major = CBD_VERSION_MAJOR;
|
|
header.version_minor = CBD_VERSION_MINOR;
|
|
memcpy(&header.params, &c->params, sizeof(header.params));
|
|
memcpy(&header.stats, &c->stats, sizeof(header.stats));
|
|
cbd_header_put(pblkbuf, &header);
|
|
iopagev[0] = pblkpage;
|
|
ret = pblk_write_wait(&c->params, 0, 1, iopagev);
|
|
if (ret) {
|
|
printk(KERN_ERR "%s: failed to write header\n", __func__);
|
|
}
|
|
cbd_free_page(pblkpage);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int
|
|
compress_read(struct compress *c, struct bio *bio)
|
|
{
|
|
struct lbd* lbd = NULL;
|
|
struct bio_vec bv;
|
|
struct bvec_iter iter;
|
|
u32 lblk_per_sector = lblk_per_pblk(&c->params) * PBLK_PER_SECTOR;
|
|
|
|
bio_for_each_segment(bv, bio, iter) {
|
|
u64 lblk = iter.bi_sector / lblk_per_sector;
|
|
u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
|
|
unsigned long flags;
|
|
char* data;
|
|
|
|
lbd = lbdcache_get(c->lc, lblk);
|
|
if (!lbd) {
|
|
return -EIO;
|
|
}
|
|
data = bvec_kmap_irq(&bv, &flags);
|
|
lbd_data_read(lbd, lblk_off, bv.bv_len, data);
|
|
bvec_kunmap_irq(data, &flags);
|
|
lbdcache_put(c->lc, lbd);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
compress_write(struct compress *c, struct bio *bio)
|
|
{
|
|
struct lbd* lbd = NULL;
|
|
struct bio_vec bv;
|
|
struct bvec_iter iter;
|
|
u32 lblk_per_sector = lblk_per_pblk(&c->params) * PBLK_PER_SECTOR;
|
|
|
|
bio_for_each_segment(bv, bio, iter) {
|
|
u64 lblk = iter.bi_sector / lblk_per_sector;
|
|
u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
|
|
unsigned long flags;
|
|
char* data;
|
|
|
|
lbd = lbdcache_get(c->lc, lblk);
|
|
if (!lbd) {
|
|
return -EIO;
|
|
}
|
|
data = bvec_kmap_irq(&bv, &flags);
|
|
lbd_data_write(lbd, lblk_off, bv.bv_len, data);
|
|
bvec_kunmap_irq(data, &flags);
|
|
lbdcache_put(c->lc, lbd);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
compress_io(struct compress_io* cio)
|
|
{
|
|
int ret;
|
|
struct compress* c = cio->c;
|
|
struct bio* bio = cio->bio;
|
|
|
|
switch (bio_op(bio)) {
|
|
case REQ_OP_READ:
|
|
ret = compress_read(c, bio);
|
|
break;
|
|
case REQ_OP_WRITE:
|
|
ret = compress_write(c, bio);
|
|
break;
|
|
default:
|
|
printk(KERN_ERR "%s: unknown op in bio: %u\n", __func__, bio_op(bio));
|
|
ret = -EINVAL;
|
|
}
|
|
if (ret) {
|
|
printk(KERN_ERR "%s: failed, ret=%d\n", __func__, ret);
|
|
}
|
|
|
|
bio->bi_status = (ret == 0 ? BLK_STS_OK : BLK_STS_IOERR); /* XXX */
|
|
bio_endio(bio);
|
|
}
|
|
|
|
#ifdef USE_WORKQUEUE
|
|
static void
|
|
compress_io_work(struct work_struct* work)
|
|
{
|
|
struct compress_io* cio = container_of(work, struct compress_io, work);
|
|
|
|
compress_io(cio);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Usage:
|
|
* echo "<start_sector> <end_sector> compress <backing_device> <args...>" | dmsetup create <compress_name>
|
|
* Where:
|
|
* start_sector is the starting sector of the backing device.
|
|
* end_sector is the ending sector of the backing device.
|
|
* compress is the name of this module.
|
|
* backing_device is the name backing device.
|
|
* args is:
|
|
* create [lblk_shift=#]
|
|
* open
|
|
* compress_name is the name of the compress device.
|
|
*/
|
|
static int
|
|
compress_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|
{
|
|
int ret;
|
|
unsigned int argn;
|
|
u32 cache_pages = 0;
|
|
struct compress *c = NULL;
|
|
u64 backing_nr_pblks;
|
|
|
|
printk(KERN_INFO "%s: enter: argc=%u\n", __func__, argc);
|
|
for (argn = 0; argn < argc; ++argn) {
|
|
printk(KERN_INFO " ... arg[%u]=\"%s\"\n", argn, argv[argn]);
|
|
}
|
|
if (argc == 0) {
|
|
ti->error = "No device specified";
|
|
return -EINVAL;
|
|
}
|
|
|
|
argn = 1;
|
|
while (argn < argc) {
|
|
const char* arg = argv[argn++];
|
|
const char* val = NULL;
|
|
const char* eq = strchr(arg, '=');
|
|
int err;
|
|
if (eq) {
|
|
val = eq + 1;
|
|
}
|
|
#if 0
|
|
if (!memcmp(arg, "verbose", 7)) {
|
|
err = kstrtouint(eq + 1, 0, &verbose_level);
|
|
if (err) {
|
|
ti->error = "Failed to parse verbose";
|
|
return -EINVAL;
|
|
}
|
|
continue;
|
|
}
|
|
#endif
|
|
/* XXX: Parse suffixes */
|
|
if (!memcmp(arg, "cache_pages", 7)) {
|
|
err = kstrtouint(eq + 1, 0, &cache_pages);
|
|
if (err) {
|
|
ti->error = "Failed to parse cache_pages";
|
|
return -EINVAL;
|
|
}
|
|
continue;
|
|
}
|
|
ti->error = "Unrecognized argument";
|
|
return -EINVAL;
|
|
}
|
|
|
|
c = kzalloc(sizeof(struct compress), GFP_KERNEL);
|
|
if (!c) {
|
|
ti->error = "Failed to allocate target";
|
|
return -ENOMEM;
|
|
}
|
|
|
|
if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev)) {
|
|
ti->error = "Device lookup failed";
|
|
kfree(c);
|
|
return -EINVAL;
|
|
}
|
|
|
|
ti->private = c;
|
|
|
|
backing_nr_pblks = blkdev_pblk_size(c->dev->bdev);
|
|
|
|
if ((backing_nr_pblks >> 48) != 0) {
|
|
ti->error = "Device too large";
|
|
ret = -EINVAL;
|
|
goto err;
|
|
}
|
|
|
|
if (!cache_pages) {
|
|
/* Minimum of 1/1k RAM and 1/64k device size */
|
|
cache_pages = min((unsigned int)(totalram_pages >> 10),
|
|
(unsigned int)(backing_nr_pblks >> 16));
|
|
if (cache_pages < 32 * 2 * num_online_cpus()) {
|
|
cache_pages = 32 * 2 * num_online_cpus();
|
|
}
|
|
}
|
|
printk(KERN_INFO "%s: pages=%lu pblks=%lu cache_pages=%u\n",
|
|
__func__, totalram_pages, (unsigned long)backing_nr_pblks, cache_pages);
|
|
|
|
ti->per_io_data_size = ALIGN(sizeof(struct compress_io), ARCH_KMALLOC_MINALIGN);
|
|
|
|
ret = compress_read_header(c);
|
|
if (ret) {
|
|
goto err;
|
|
}
|
|
if (c->params.flags & CBD_FLAG_DIRTY) {
|
|
printk(KERN_INFO "Warning: device was not properly closed\n");
|
|
}
|
|
if (dm_table_get_mode(ti->table) & FMODE_WRITE) {
|
|
u16 save_flags = c->params.flags;
|
|
c->params.flags |= CBD_FLAG_DIRTY;
|
|
ret = compress_write_header(c);
|
|
c->params.flags = save_flags;
|
|
if (ret) {
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
/* XXX: validate minumum pblk using zone_off(max_zone+1) */
|
|
if (c->params.nr_pblk > backing_nr_pblks) {
|
|
printk(KERN_ERR "%s: bad nr_pblk\n", __func__);
|
|
ret = -EINVAL;
|
|
goto err;
|
|
}
|
|
|
|
if (c->params.nr_zones > zone_for_pblk(&c->params, backing_nr_pblks)) {
|
|
printk(KERN_ERR "%s: bad nr_zones\n", __func__);
|
|
ret = -EINVAL;
|
|
goto err;
|
|
}
|
|
|
|
/* XXX: validate lblk_per_zone */
|
|
|
|
c->lc = kmalloc(lbdcache_size(), GFP_KERNEL);
|
|
if (!c->lc) {
|
|
printk(KERN_ERR "Failed to alloc lbdcache\n");
|
|
ret = -ENOMEM;
|
|
goto err;
|
|
}
|
|
if (!lbdcache_ctr(c->lc, &c->params, &c->stats, cache_pages)) {
|
|
printk(KERN_ERR "Failed to init logical block cache\n");
|
|
ret = -ENOMEM;
|
|
goto err;
|
|
}
|
|
|
|
c->io_workq = alloc_workqueue("compress_io", WQ_HIGHPRI | WQ_MEM_RECLAIM, 1);
|
|
if (!c->io_workq) {
|
|
printk(KERN_ERR "%s: failed to alloc io_workq\n", __func__);
|
|
ret = -ENOMEM;
|
|
goto err;
|
|
}
|
|
|
|
printk(KERN_INFO "%s: success\n", __func__);
|
|
|
|
return 0;
|
|
|
|
err:
|
|
dm_put_device(ti, c->dev);
|
|
kfree(c);
|
|
return ret;
|
|
}
|
|
|
|
static void
|
|
compress_dtr(struct dm_target *ti)
|
|
{
|
|
int ret;
|
|
struct compress *c;
|
|
|
|
printk(KERN_INFO "%s: enter\n", __func__);
|
|
|
|
c = ti->private;
|
|
|
|
if (dm_table_get_mode(ti->table) & FMODE_WRITE) {
|
|
ret = compress_write_header(c);
|
|
if (ret) {
|
|
printk(KERN_INFO "Warning: failed to write header\n");
|
|
}
|
|
}
|
|
lbdcache_dtr(c->lc);
|
|
kfree(c->lc);
|
|
if (c->io_workq) {
|
|
destroy_workqueue(c->io_workq);
|
|
}
|
|
dm_put_device(ti, c->dev);
|
|
kfree(c);
|
|
}
|
|
|
|
static int
|
|
compress_map(struct dm_target *ti, struct bio *bio)
|
|
{
|
|
struct compress *c = ti->private;
|
|
struct compress_io *cio;
|
|
|
|
if (c->params.flags & CBD_FLAG_ERROR) {
|
|
bio->bi_status = BLK_STS_IOERR;
|
|
bio_endio(bio);
|
|
return DM_MAPIO_SUBMITTED; /* XXXX: DM_MAPIO_KILL? */
|
|
}
|
|
|
|
/* from dm-crypt.c */
|
|
if (unlikely(bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)) {
|
|
bio_set_dev(bio, c->dev->bdev);
|
|
if (bio_sectors(bio)) {
|
|
/* XXX: remap to underlying data */
|
|
}
|
|
return DM_MAPIO_REMAPPED;
|
|
}
|
|
|
|
/* Synchronous I/O operations deadlock, so queue them. */
|
|
/* XXX: clone the bio? */
|
|
cio = dm_per_bio_data(bio, ti->per_io_data_size);
|
|
cio->c = c;
|
|
cio->bio = bio;
|
|
#ifdef USE_WORKQUEUE
|
|
INIT_WORK(&cio->work, compress_io_work);
|
|
queue_work(c->io_workq, &cio->work);
|
|
#else
|
|
compress_io(io);
|
|
#endif
|
|
|
|
return DM_MAPIO_SUBMITTED;
|
|
}
|
|
|
|
static struct target_type compress_target = {
|
|
.name = "compress",
|
|
.version = { 1, 0, 0 },
|
|
.module = THIS_MODULE,
|
|
.ctr = compress_ctr,
|
|
.dtr = compress_dtr,
|
|
.map = compress_map,
|
|
};
|
|
|
|
static int __init
|
|
dm_compress_init(void)
|
|
{
|
|
int res;
|
|
res = dm_register_target(&compress_target);
|
|
if (res < 0) {
|
|
printk(KERN_ERR "Failed to register dm-compress: %d\n", res);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
static void __exit
|
|
dm_compress_exit(void)
|
|
{
|
|
dm_unregister_target(&compress_target);
|
|
}
|
|
|
|
module_init(dm_compress_init);
|
|
module_exit(dm_compress_exit);
|
|
|
|
MODULE_DESCRIPTION("compress target for transparent compression");
|
|
MODULE_AUTHOR("Tom Marshall <tdm.code@gmail.com>");
|
|
MODULE_LICENSE("GPL");
|
|
MODULE_VERSION("1.0");
|