cbd/dm-compress/dm-compress.c

1109 lines
28 KiB
C

/*
* Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/device-mapper.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/lz4.h>
#include <linux/dm-compress.h>
// XXX: find a better name for this, something about storage vs. speed.
// XXX: should this be in cbd_params?
// #define CBD_DETECT_ZERO_BLOCKS
/*
* XXX
* If we don't use a workqueue, blkdev_pblk_io() stalls. Why?
*/
#define USE_WORKQUEUE 1
#define ZONE_NONE (u32)(~0)
#define LBLK_NONE (u64)(~0)
/* per bio private data */
struct dm_compress_io {
struct dm_compress* dc;
struct bio* bio;
struct work_struct work;
};
struct dm_compress
{
struct dm_dev* dev;
struct cbd_params params;
bool io_failed; /* XXX: remove when debugging complete */
/* XXX: dm_target.off */
sector_t dm_off;
u8* lz4_wrkmem;
u8* lz4_cbuf;
/* Currently cached zone pblk alloc info (if any) */
u32 zone_pblk_alloc_idx;
bool zone_pblk_alloc_dirty;
void* zone_pblk_alloc;
/* Currently cached zone lblk alloc info (if any) */
u32 zone_lblk_alloc_idx;
void* zone_lblk_alloc;
/* Currently cached lblk data (if any) */
u64 lblk_num;
bool lblk_dirty;
struct lblk_alloc_elem* lblk_alloc;
void* lblk;
/* Queueing stuff */
struct workqueue_struct* io_queue;
struct mutex io_lock;
};
static inline int
memcmpz(const void* buf, size_t len)
{
const char* end = (const char*)buf + len;
const char* p;
for (p = (const char*)buf; p < end; ++p) {
if (*p) {
return 1;
}
}
return 0;
}
static inline u64
blkdev_pblk_size(struct block_device *bdev)
{
return i_size_read(bdev->bd_inode) >> PBLK_SHIFT;
}
static inline u64
dm_target_pblk_size(struct dm_target* ti)
{
return ti->len >> (PBLK_SHIFT - SECTOR_SHIFT);
}
/**************************************
* Core low-level I/O.
*
* pblk count are in units of physical blocks (4096 bytes), NOT sectors.
* data is a page address (obtained via __get_free_pages and friends).
**************************************/
static int
blkdev_pblk_io(struct block_device* dev, unsigned int op, u64 pblk, u32 count, void *data)
{
int ret;
unsigned long data_addr;
struct bio *bio;
data_addr = (unsigned long)data;
if (data_addr & (PAGE_SIZE-1)) {
printk(KERN_ERR " Not page aligned\n");
return -EINVAL;
}
if (!virt_addr_valid(data)) {
printk(KERN_ERR " Not valid address\n");
return -EINVAL;
}
bio = bio_alloc(GFP_KERNEL, count);
if (!bio) {
printk(KERN_ERR "%s: out of memory\n", __func__);
return -ENOMEM;
}
bio_set_dev(bio, dev);
bio->bi_iter.bi_sector = (pblk << (PBLK_SHIFT - SECTOR_SHIFT));
bio->bi_opf = op;
while (count--) {
struct page *page = virt_to_page(data);
if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) {
printk(KERN_ERR "%s: cannot add page to bio\n", __func__);
bio_put(bio);
return -EINVAL;
}
data = (u8*)data + PAGE_SIZE;
}
/*
* XXX: We would like to make writes asychronous, but submit_bio() does
* not return a meaningful status.
*/
ret = submit_bio_wait(bio);
if (ret != 0) {
printk(KERN_ERR "%s: submit_bio failed\n", __func__);
bio_put(bio);
return ret;
}
bio_put(bio);
return ret;
}
static int
blkdev_pblk_read(struct block_device* dev, u64 pblk, u32 count, void *data)
{
return blkdev_pblk_io(dev, REQ_OP_READ, pblk, count, data);
}
static int
blkdev_pblk_write(struct block_device* dev, u64 pblk, u32 count, void *data)
{
return blkdev_pblk_io(dev, REQ_OP_WRITE, pblk, count, data);
}
/**************************************
* Zone pblk functions
**************************************/
static int
zone_pblk_alloc_write(struct dm_compress* dc)
{
int ret;
u64 pblk;
u32 count;
BUG_ON(dc->zone_pblk_alloc_idx == ZONE_NONE);
pblk = pblk_alloc_off(&dc->params, dc->zone_pblk_alloc_idx);
count = pblk_alloc_len(&dc->params);
ret = blkdev_pblk_write(dc->dev->bdev, pblk, count, dc->zone_pblk_alloc);
if (ret != 0) {
return ret;
}
dc->zone_pblk_alloc_dirty = false;
return 0;
}
static int
zone_pblk_alloc_flush(struct dm_compress* dc)
{
int ret;
if (dc->zone_pblk_alloc_dirty) {
ret = zone_pblk_alloc_write(dc);
if (ret) {
return ret;
}
}
return 0;
}
/* Read zone physical block alloc bitmap */
static int
zone_pblk_alloc_read(struct dm_compress* dc, u32 idx)
{
int ret;
u64 pblk;
u32 count;
if (dc->zone_pblk_alloc_idx == idx) {
return 0;
}
ret = zone_pblk_alloc_flush(dc);
if (ret != 0) {
return ret;
}
pblk = pblk_alloc_off(&dc->params, idx);
count = pblk_alloc_len(&dc->params);
ret = blkdev_pblk_read(dc->dev->bdev, pblk, count, dc->zone_pblk_alloc);
if (ret) {
return ret;
}
dc->zone_pblk_alloc_idx = idx;
return 0;
}
/*
* Get (allocate) one pblk from the currently cached zone pblk alloc bitmap.
* XXX: get rid of this function and use pblk_alloc directly in lblk_write().
*/
static u64
zone_pblk_alloc_get(struct dm_compress* dc, u32 zone_hint)
{
u32 zone_pblk_count = pblk_alloc_len(&dc->params) * PBLK_SIZE_BITS;
u32 zone;
u32 idx;
zone = zone_hint;
/* XXX: check both forward and backward */
do {
if (zone_pblk_alloc_read(dc, zone) != 0) {
printk(KERN_ERR " zone_pblk_alloc_read failed\n");
return 0;
}
idx = cbd_bitmap_alloc(dc->zone_pblk_alloc, zone_pblk_count);
if (idx != zone_pblk_count) {
dc->zone_pblk_alloc_dirty = true;
return zone_data_off(&dc->params, zone) + idx;
}
++zone;
if (zone == dc->params.nr_zones) {
zone = 0;
}
}
while (zone != zone_hint);
printk(KERN_ERR "%s: fail, all zones full\n", __func__);
return 0;
}
/*
* Put (free) one pblk into the currently cached zone pblk alloc bitmap.
* XXX: get rid of this function and use pblk_free directly in lblk_write().
*/
static int
zone_pblk_alloc_put(struct dm_compress* dc, u64 pblk)
{
u32 zone_pblk_count = pblk_alloc_len(&dc->params) * PBLK_SIZE_BITS;
u32 zone;
u32 idx;
int ret;
if (pblk < CBD_HEADER_BLOCKS) {
printk(KERN_ERR "%s: pblk index is in header\n", __func__);
return -EINVAL;
}
zone = (pblk - CBD_HEADER_BLOCKS) / zone_len(&dc->params);
if (zone >= dc->params.nr_zones) {
printk(KERN_ERR "%s: pblk zone out of bounds\n", __func__);
return -EINVAL;
}
if (pblk < zone_data_off(&dc->params, zone)) {
printk(KERN_ERR "%s: pblk index in metadata\n", __func__);
return -EINVAL;
}
idx = pblk - zone_data_off(&dc->params, zone);
if (idx >= zone_pblk_count) {
printk(KERN_ERR "%s: pblk index out of bounds\n", __func__);
return -EINVAL;
}
ret = zone_pblk_alloc_read(dc, zone);
if (ret != 0) {
return ret;
}
cbd_bitmap_free(dc->zone_pblk_alloc, idx);
dc->zone_pblk_alloc_dirty = true;
return 0;
}
/**************************************
* Zone lblk functions
**************************************/
static int
zone_lblk_alloc_write(struct dm_compress* dc)
{
int ret;
u64 pblk;
u32 count;
if (!dc->zone_lblk_alloc) {
printk(KERN_ERR "%s: zone_lblk_alloc is NULL\n", __func__);
return -EINVAL;
}
BUG_ON(dc->zone_lblk_alloc_idx == ZONE_NONE);
pblk = lblk_alloc_off(&dc->params, dc->zone_lblk_alloc_idx);
count = lblk_alloc_len(&dc->params);
ret = blkdev_pblk_write(dc->dev->bdev, pblk, count, dc->zone_lblk_alloc);
if (ret != 0) {
return ret;
}
return 0;
}
/*
* XXX: Another opportunity to choose speed vs. space: only allocate two
* pages for lblk_alloc_elem buffer instead of the entire lblk_alloc.
*/
static int
zone_lblk_alloc_read(struct dm_compress* dc, u32 idx)
{
int ret;
u64 pblk;
u32 count;
if (dc->zone_lblk_alloc_idx == idx) {
return 0;
}
pblk = lblk_alloc_off(&dc->params, idx);
count = lblk_alloc_len(&dc->params);
ret = blkdev_pblk_read(dc->dev->bdev, pblk, count, dc->zone_lblk_alloc);
if (ret == 0) {
dc->zone_lblk_alloc_idx = idx;
}
return ret;
}
/**************************************
* Logical block functions
**************************************/
/*
* Compress dc->lblk into dc->lz4_cbuf
*
* Returns number of bytes in cbuf or 0 for failure.
*/
static size_t
lblk_compress(struct dm_compress* dc)
{
int ret;
void *dbuf = dc->lblk;
u32 dlen = PBLK_SIZE * lblk_per_pblk(&dc->params);
void *cbuf = dc->lz4_cbuf;
u32 clen = PBLK_SIZE * lblk_per_pblk(&dc->params);
ret = LZ4_compress_default(dbuf, cbuf, dlen, clen, dc->lz4_wrkmem);
if (ret <= 0) {
return 0;
}
return (size_t)ret;
}
/*
* Decompress dc->lz4_cbuf of size clen into dc->lblk
*
* Returns 0 for success, <0 for failure.
*/
static int
lblk_decompress(struct dm_compress* dc, u32 clen)
{
int ret;
void *cbuf = dc->lz4_cbuf;
void *dbuf = dc->lblk;
u32 dlen = PBLK_SIZE * lblk_per_pblk(&dc->params);
ret = LZ4_decompress_safe(cbuf, dbuf, clen, dlen);
if (ret != dlen) {
printk(KERN_ERR "%s: failed, ret=%d (expected %u)\n", __func__, ret, (unsigned int)dlen);
return -1;
}
return 0;
}
static int
lblk_write(struct dm_compress* dc)
{
int ret;
u32 zone;
u32 zone_lblk;
size_t d_len;
size_t c_len;
u8* c_buf;
u32 n;
u64 pblk;
zone = dc->lblk_num / dc->params.lblk_per_zone;
zone_lblk = dc->lblk_num - (zone * dc->params.lblk_per_zone);
/*
* We must have dc->zone_lblk_alloc and dc->lblk_alloc cached by
* the previous lblk_read().
*/
if (dc->zone_lblk_alloc_idx != zone) {
printk(KERN_ERR "*** lblk_alloc not cached: %lu vs %lu\n", (unsigned long)dc->zone_lblk_alloc_idx, (unsigned long)zone);
return -EIO;
}
BUG_ON(dc->zone_lblk_alloc_idx != zone);
d_len = PBLK_SIZE * lblk_per_pblk(&dc->params);
#ifdef CBD_DETECT_ZERO_BLOCKS
if (memcmpz(dc->lblk, d_len) == 0) {
#else
if (0) {
#endif
c_len = 0;
c_buf = NULL;
dc->lblk_alloc->len = 0;
}
else {
c_len = lblk_compress(dc);
if (c_len > 0) {
c_buf = dc->lz4_cbuf;
dc->lblk_alloc->len = c_len;
}
else {
c_len = d_len;
c_buf = dc->lblk;
dc->lblk_alloc->len = CBD_UNCOMPRESSED;
}
}
for (n = 0; n < lblk_per_pblk(&dc->params); ++n) {
if (c_len > PBLK_SIZE * n) {
pblk = dc->lblk_alloc->pblk[n];
if (!pblk) {
pblk = zone_pblk_alloc_get(dc, zone);
if (pblk == 0) {
printk(KERN_ERR " zone_pblk_alloc_get failed\n");
return -ENOSPC;
}
dc->lblk_alloc->pblk[n] = pblk;
}
blkdev_pblk_write(dc->dev->bdev, pblk, 1, c_buf);
c_buf += PBLK_SIZE;
}
else {
pblk = dc->lblk_alloc->pblk[n];
if (pblk) {
dc->lblk_alloc->pblk[n] = 0;
ret = zone_pblk_alloc_put(dc, pblk);
if (ret != 0) {
printk(KERN_ERR " zone_pblk_alloc_put failed\n");
return ret;
}
}
}
}
lblk_alloc_elem_put(&dc->params,
dc->zone_lblk_alloc, zone_lblk, dc->lblk_alloc);
ret = zone_lblk_alloc_write(dc);
if (ret != 0) {
printk(KERN_ERR " zone_lblk_alloc_write failed\n");
return ret;
}
ret = zone_pblk_alloc_flush(dc);
if (ret != 0) {
printk(KERN_ERR " zone_pblk_alloc_flush failed\n");
return ret;
}
dc->lblk_dirty = false;
return 0;
}
static int
lblk_flush(struct dm_compress* dc)
{
int ret;
if (dc->lblk_dirty) {
ret = lblk_write(dc);
if (ret) {
return ret;
}
}
return 0;
}
static int
lblk_read(struct dm_compress* dc, u64 idx)
{
int ret;
u32 zone;
u32 zone_lblk;
u32 c_len;
u64 pblk;
if (dc->lblk_num == idx) {
return 0;
}
ret = lblk_flush(dc);
if (ret) {
return ret;
}
zone = idx / dc->params.lblk_per_zone;
zone_lblk = idx - (zone * dc->params.lblk_per_zone);
ret = zone_lblk_alloc_read(dc, zone);
if (ret != 0) {
printk(KERN_ERR " zone_lblk_alloc_read failed\n");
return ret;
}
lblk_alloc_elem_get(&dc->params,
dc->zone_lblk_alloc, zone_lblk, dc->lblk_alloc);
c_len = dc->lblk_alloc->len;
if (c_len == 0) {
memset(dc->lblk, 0, PBLK_SIZE * lblk_per_pblk(&dc->params));
}
else {
bool is_compressed = true;
size_t d_len = PBLK_SIZE * lblk_per_pblk(&dc->params);
size_t n;
u8* p;
if (c_len == CBD_UNCOMPRESSED) {
is_compressed = false;
c_len = d_len;
}
p = dc->lz4_cbuf;
for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) {
pblk = dc->lblk_alloc->pblk[n];
BUG_ON(pblk == 0);
ret = blkdev_pblk_read(dc->dev->bdev, pblk, 1, p);
if (ret != 0) {
return ret;
}
}
if (is_compressed) {
if (lblk_decompress(dc, c_len) != 0) {
printk(KERN_ERR " decompress failed\n");
return -1;
}
}
else {
memcpy(dc->lblk, dc->lz4_cbuf, d_len);
}
}
dc->lblk_num = idx;
return 0;
}
/**************************************
* Main functions
**************************************/
static void*
compress_alloc_pages(size_t size)
{
unsigned int order = get_order(size);
void* ret;
if (size > (PAGE_SIZE * 128) || order > 7) {
printk(KERN_ERR "%s: size %zu order %u too large\n", __func__, size, order);
return NULL;
}
ret = (void*)__get_free_pages(GFP_KERNEL, order);
if (!ret) {
printk(KERN_ERR "%s: failed to alloc %zu bytes\n", __func__, size);
}
memset(ret, 0, size);
return ret;
}
static void
compress_free_pages(void* ptr, size_t size)
{
unsigned int order = get_order(size);
size_t n;
size_t in_use = 0;
if (!ptr) {
return;
}
for (n = 0; n < (1 << order); ++n) {
struct page* pg = virt_to_page(ptr + n * PAGE_SIZE);
int refcount = page_ref_count(pg);
if (n == 0) {
--refcount;
}
if (refcount) {
++in_use;
}
}
if (in_use) {
printk(KERN_ERR "%s: *** %zu of %zu pages in use ***\n", __func__, in_use, n);
return;
}
free_pages((unsigned long)ptr, order);
}
static void
compress_free_buffers(struct dm_compress* dc)
{
compress_free_pages(dc->lblk, PBLK_SIZE * lblk_per_pblk(&dc->params));
dc->lblk = NULL;
kfree(dc->lblk_alloc);
dc->lblk_alloc = NULL;
compress_free_pages(dc->zone_lblk_alloc, PBLK_SIZE * lblk_alloc_len(&dc->params));
dc->zone_lblk_alloc = NULL;
compress_free_pages(dc->zone_pblk_alloc, PBLK_SIZE * pblk_alloc_len(&dc->params));
dc->zone_pblk_alloc = NULL;
compress_free_pages(dc->lz4_cbuf, PBLK_SIZE * lblk_per_pblk(&dc->params));
dc->lz4_cbuf = NULL;
kfree(dc->lz4_wrkmem);
dc->lz4_wrkmem = NULL;
}
/*
* XXX: Many of the below (all except lz4 buffers) are used in bio operations
* and should be page aligned. We always get page aligned buffers because of
* the way kmalloc() works, but that is technically not guaranteed.
*/
static int
compress_alloc_buffers(struct dm_compress* dc)
{
dc->lz4_wrkmem = kmalloc(LZ4_compressBound(PBLK_SIZE * lblk_per_pblk(&dc->params)), GFP_KERNEL);
if (!dc->lz4_wrkmem) {
printk(KERN_ERR "%s: Failed to alloc lz4_wrkmem\n", __func__);
goto out_nomem;
}
dc->lz4_cbuf = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params));
if (!dc->lz4_cbuf) {
printk(KERN_ERR "%s: Failed to alloc lz4_cmem\n", __func__);
goto out_nomem;
}
dc->zone_pblk_alloc_idx = ZONE_NONE;
dc->zone_pblk_alloc_dirty = false;
dc->zone_pblk_alloc = compress_alloc_pages(PBLK_SIZE * pblk_alloc_len(&dc->params));
if (!dc->zone_pblk_alloc) {
printk(KERN_ERR "%s: Failed to alloc zone_pblk_alloc\n", __func__);
goto out_nomem;
}
dc->zone_lblk_alloc_idx = ZONE_NONE;
dc->zone_lblk_alloc = compress_alloc_pages(PBLK_SIZE * lblk_alloc_len(&dc->params));
if (!dc->zone_lblk_alloc) {
printk(KERN_ERR "%s: Failed to alloc zone_lblk_alloc\n", __func__);
goto out_nomem;
}
dc->lblk_num = LBLK_NONE;
dc->lblk_dirty = false;
dc->lblk_alloc = kmalloc(offsetof(struct lblk_alloc_elem, pblk[lblk_per_pblk(&dc->params)]), GFP_KERNEL);
if (!dc->lblk_alloc) {
printk(KERN_ERR "%s: Failed to alloc lblk_alloc\n", __func__);
goto out_nomem;
}
dc->lblk = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params));
if (!dc->lblk) {
printk(KERN_ERR "%s: Failed to alloc lblk\n", __func__);
goto out_nomem;
}
return 0;
out_nomem:
compress_free_buffers(dc);
return -ENOMEM;
}
static int
compress_open(struct dm_compress* dc, u64 dev_nr_pblks)
{
int err;
u8 *pblkbuf;
struct cbd_header header;
u64 max_nr_zones;
pblkbuf = kmalloc(PBLK_SIZE, GFP_KERNEL);
if (!pblkbuf) {
return -ENOMEM;
}
err = blkdev_pblk_read(dc->dev->bdev, 0, 1, pblkbuf);
if (err) {
printk(KERN_ERR "%s: failed to read header\n", __func__);
goto out;
}
cbd_header_get(pblkbuf, &header);
if (memcmp(header.magic, CBD_MAGIC, sizeof(header.magic)) != 0) {
printk(KERN_ERR "%s: bad magic\n", __func__);
err = -EINVAL;
goto out;
}
if (header.version_major != CBD_VERSION_MAJOR) {
printk(KERN_ERR "%s: bad version\n", __func__);
err = -EINVAL;
goto out;
}
if (header.version_minor != CBD_VERSION_MINOR) {
printk(KERN_ERR "%s: bad version\n", __func__);
err = -EINVAL;
goto out;
}
if (header.params.lblk_shift < LBLK_SHIFT_MIN ||
header.params.lblk_shift > LBLK_SHIFT_MAX) {
printk(KERN_ERR "%s: bad lblk_shift\n", __func__);
err = -EINVAL;
goto out;
}
/* XXX: validate minumum pblk using zone_off(max_zone+1) */
if (header.params.nr_pblk > dev_nr_pblks) {
printk(KERN_ERR "%s: bad nr_pblk\n", __func__);
err = -EINVAL;
goto out;
}
max_nr_zones = (dev_nr_pblks - CBD_HEADER_BLOCKS) / zone_len(&header.params);
if (header.params.nr_zones > max_nr_zones) {
printk(KERN_ERR "%s: bad nr_zones\n", __func__);
err = -EINVAL;
goto out;
}
/* XXX: validate lblk_per_zone */
printk(KERN_INFO "%s: parameters...\n", __func__);
printk(KERN_INFO " algorithm=%hu\n", (unsigned short)header.params.algorithm);
printk(KERN_INFO " compression=%hu\n", (unsigned short)header.params.compression);
printk(KERN_INFO " lblk_shift=%hu\n", (unsigned short)header.params.lblk_shift);
printk(KERN_INFO " nr_pblk=%lu\n", (unsigned long)header.params.nr_pblk);
printk(KERN_INFO " nr_zones=%u\n", (unsigned int)header.params.nr_zones);
printk(KERN_INFO " lblk_per_zone=%u\n", (unsigned int)header.params.lblk_per_zone);
memcpy(&dc->params, &header.params, sizeof(header.params));
err = compress_alloc_buffers(dc);
if (err) {
printk(KERN_ERR "%s: failed to alloc buffers\n", __func__);
goto out;
}
dc->io_queue = alloc_workqueue("kcompress_io", WQ_HIGHPRI | WQ_MEM_RECLAIM, 1);
if (!dc->io_queue) {
printk(KERN_ERR "%s: failed to alloc io_queue\n", __func__);
compress_free_buffers(dc);
return -ENOMEM;
}
mutex_init(&dc->io_lock);
out:
kfree(pblkbuf);
return err;
}
static int
compress_read(struct dm_compress *dc, struct bio *bio)
{
struct bio_vec bv;
struct bvec_iter iter;
int ret;
u32 lblk_per_sector = lblk_per_pblk(&dc->params) * PBLK_PER_SECTOR;
u32 lblk_len = lblk_per_sector * SECTOR_SIZE;
bio_for_each_segment(bv, bio, iter) {
sector_t lblk = iter.bi_sector / lblk_per_sector;
u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
unsigned long flags;
char* data;
/* Ensure the data is within the logical block */
if (lblk_off + bv.bv_len > lblk_len) {
printk(KERN_ERR "%s: logical block bounds exceeded\n", __func__);
return -EIO;
}
/* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */
ret = lblk_read(dc, lblk);
if (ret) {
return ret;
}
data = bvec_kmap_irq(&bv, &flags);
memcpy(data, dc->lblk + lblk_off, bv.bv_len);
bvec_kunmap_irq(data, &flags);
}
return 0;
}
static int
compress_write(struct dm_compress *dc, struct bio *bio)
{
struct bio_vec bv;
struct bvec_iter iter;
int ret;
u32 lblk_per_sector = lblk_per_pblk(&dc->params) * PBLK_PER_SECTOR;
u32 lblk_len = lblk_per_sector * SECTOR_SIZE;
bio_for_each_segment(bv, bio, iter) {
sector_t lblk = iter.bi_sector / lblk_per_sector;
u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
unsigned long flags;
char* data;
/* Ensure the data is within the logical block */
if (lblk_off + bv.bv_len > lblk_len) {
printk(KERN_ERR "%s logical block bounds exceeded\n", __func__);
printk(KERN_ERR " sector=%lu\n", (unsigned long)iter.bi_sector);
printk(KERN_ERR " bv_len=%u bv_offset=%u\n", bv.bv_len, bv.bv_offset);
printk(KERN_ERR " lblk=%lu lblk_off=%u\n", (unsigned long)lblk, lblk_off);
return -EIO;
}
/* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */
ret = lblk_read(dc, lblk);
if (ret) {
return ret;
}
data = bvec_kmap_irq(&bv, &flags);
memcpy(dc->lblk + lblk_off, data, bv.bv_len);
bvec_kunmap_irq(data, &flags);
dc->lblk_dirty = true;
}
ret = lblk_flush(dc);
if (ret) {
return ret;
}
return 0;
}
static void compress_io(struct dm_compress_io* io)
{
int ret;
struct dm_compress* dc = io->dc;
struct bio* bio = io->bio;
if (dc->io_failed) {
bio->bi_status = BLK_STS_IOERR; /* XXX */
bio_endio(bio);
return;
}
mutex_lock(&dc->io_lock);
switch (bio_op(bio)) {
case REQ_OP_READ:
ret = compress_read(dc, bio);
break;
case REQ_OP_WRITE:
ret = compress_write(dc, bio);
break;
default:
printk(KERN_ERR "%s: unknown op in bio: %u\n", __func__, bio_op(bio));
ret = -EINVAL;
}
if (ret) {
printk(KERN_ERR "%s: failed, ret=%d\n", __func__, ret);
dc->io_failed = true;
}
mutex_unlock(&dc->io_lock);
bio->bi_status = (ret == 0 ? BLK_STS_OK : BLK_STS_IOERR); /* XXX */
bio_endio(bio);
}
#ifdef USE_WORKQUEUE
static void
compress_io_work(struct work_struct *work)
{
struct dm_compress_io *io = container_of(work, struct dm_compress_io, work);
compress_io(io);
}
#endif
/*
* Usage:
* echo "<start_sector> <end_sector> compress <backing_device> <args...>" | dmsetup create <compress_name>
* Where:
* start_sector is the starting sector of the backing device.
* end_sector is the ending sector of the backing device.
* compress is the name of this module.
* backing_device is the name backing device.
* args is:
* create [lblk_shift=#]
* open
* compress_name is the name of the compress device.
*/
static int
compress_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
int err;
unsigned int argn;
struct dm_compress *dc = NULL;
u64 dev_nr_pblks;
printk(KERN_INFO "%s: enter: argc=%u\n", __func__, argc);
for (argn = 0; argn < argc; ++argn) {
printk(KERN_INFO " ... arg[%u]=\"%s\"\n", argn, argv[argn]);
}
if (argc == 0) {
ti->error = "No device specified";
return -EINVAL;
}
argn = 1;
while (argn < argc) {
const char* arg = argv[argn++];
const char* eq = strchr(arg, '=');
if (!eq) {
ti->error = "Invalid argument format";
return -EINVAL;
}
#if 0
if (!memcmp(arg, "verbose", 7)) {
err = kstrtouint(eq + 1, 0, &verbose_level);
if (err) {
ti->error = "Failed to parse verbose";
return -EINVAL;
}
continue;
}
#endif
ti->error = "Unrecognized argument";
return -EINVAL;
}
dc = kzalloc(sizeof(struct dm_compress), GFP_KERNEL);
if (!dc) {
ti->error = "Failed to allocate target";
return -ENOMEM;
}
if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dc->dev)) {
ti->error = "Device lookup failed";
kfree(dc);
return -EINVAL;
}
dc->dm_off = ti->begin;
ti->private = dc;
dev_nr_pblks = dm_target_pblk_size(ti);
if (get_order(dev_nr_pblks) >= 48) {
ti->error = "Device too large";
kfree(dc);
return -EINVAL;
}
ti->per_io_data_size = ALIGN(sizeof(struct dm_compress_io), ARCH_KMALLOC_MINALIGN);
err = compress_open(dc, dev_nr_pblks);
if (err) {
dm_put_device(ti, dc->dev);
kfree(dc);
return err;
}
printk(KERN_INFO "%s: success\n", __func__);
return 0;
}
static void
compress_dtr(struct dm_target *ti)
{
struct dm_compress *dc;
printk(KERN_INFO "%s: enter\n", __func__);
dc = (struct dm_compress *)ti->private;
compress_free_buffers(dc);
if (dc->io_queue) {
destroy_workqueue(dc->io_queue);
}
dm_put_device(ti, dc->dev);
kfree(dc);
}
static int
compress_map(struct dm_target *ti, struct bio *bio)
{
struct dm_compress *dc = (struct dm_compress *)ti->private;
struct dm_compress_io *io;
/* from dm-crypt.c */
if (unlikely(bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)) {
bio_set_dev(bio, dc->dev->bdev);
if (bio_sectors(bio)) {
/* XXX: remap to underlying data */
}
return DM_MAPIO_REMAPPED;
}
/* Synchronous I/O operations deadlock, so queue them. */
/* XXX: clone the bio? */
io = dm_per_bio_data(bio, ti->per_io_data_size);
io->dc = dc;
io->bio = bio;
#ifdef USE_WORKQUEUE
INIT_WORK(&io->work, compress_io_work);
queue_work(dc->io_queue, &io->work);
#else
compress_io(io);
#endif
return DM_MAPIO_SUBMITTED;
}
static struct target_type compress_target = {
.name = "compress",
.version = { 1, 0, 0 },
.module = THIS_MODULE,
.ctr = compress_ctr,
.dtr = compress_dtr,
.map = compress_map,
};
static int __init
dm_compress_init(void)
{
int res;
res = dm_register_target(&compress_target);
if (res < 0) {
printk(KERN_ERR "Failed to register dm-compress: %d\n", res);
}
return res;
}
static void __exit
dm_compress_exit(void)
{
dm_unregister_target(&compress_target);
}
module_init(dm_compress_init);
module_exit(dm_compress_exit);
MODULE_DESCRIPTION("compress target for transparent compression");
MODULE_AUTHOR("Tom Marshall <tdm.code@gmail.com>");
MODULE_LICENSE("GPL");
MODULE_VERSION("1.0");