cbd/dm-compress/dm-compress.c

1288 lines
33 KiB
C
Raw Normal View History

/*
* Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/device-mapper.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/lz4.h>
#include <linux/dm-compress.h>
// XXX: find a better name for this, something about storage vs. speed.
// XXX: should this be in cbd_params?
// #define CBD_DETECT_ZERO_BLOCKS
/*
* XXX
* If we don't use a workqueue, blkdev_pblk_io() stalls. Why?
*/
#define USE_WORKQUEUE 1
#define ZONE_NONE (u32)(~0)
2019-10-09 23:41:13 +02:00
#define PBLK_NONE (u64)(~0)
#define LBLK_NONE (u64)(~0)
/* per bio private data */
struct dm_compress_io {
struct dm_compress* dc;
struct bio* bio;
struct work_struct work;
};
2019-10-11 00:07:39 +02:00
struct zone_cache {
u32 zone;
struct mutex lock;
2019-10-11 00:07:39 +02:00
/* Compression working memory */
u8* lz4_wrkmem;
u8* lz4_cbuf;
/* Currently cached zone pblk alloc info (if any) */
u32 pblk_alloc_idx;
bool pblk_alloc_dirty;
void* pblk_alloc;
/* Currently cached zone lblk alloc info (if any) */
2019-10-09 23:41:13 +02:00
u64 lblk_alloc_pblk;
u32 lblk_alloc_len;
void* lblk_alloc;
2019-10-09 23:41:13 +02:00
u64 lblk_alloc_elem_lblk;
struct lblk_alloc_elem* lblk_alloc_elem;
/* Currently cached lblk data (if any) */
u64 lblk_num;
bool lblk_dirty;
void* lblk;
2019-10-11 00:07:39 +02:00
};
struct dm_compress
{
struct dm_dev* dev;
bool io_failed;
struct cbd_params params;
struct mutex zc_lock;
unsigned int nr_zc;
struct zone_cache* zcache;
/* Queueing stuff */
struct workqueue_struct* io_queue;
};
2019-10-11 00:07:39 +02:00
/* Forward decls */
static struct zone_cache* zone_cache_get(struct dm_compress*, u32);
static int zone_cache_put(struct dm_compress*, struct zone_cache*);
static inline int
memcmpz(const void* buf, size_t len)
{
const char* end = (const char*)buf + len;
const char* p;
for (p = (const char*)buf; p < end; ++p) {
if (*p) {
return 1;
}
}
return 0;
}
static inline u64
blkdev_pblk_size(struct block_device *bdev)
{
return i_size_read(bdev->bd_inode) >> PBLK_SHIFT;
}
static inline u64
dm_target_pblk_size(struct dm_target* ti)
{
return ti->len >> (PBLK_SHIFT - SECTOR_SHIFT);
}
/*************************************
* Page level memory allocator
**************************************/
static void*
compress_alloc_pages(size_t size)
{
unsigned int order = get_order(size);
void* ret;
if (size > (PAGE_SIZE * 128) || order > 7) {
printk(KERN_ERR "%s: size %zu order %u too large\n", __func__, size, order);
return NULL;
}
ret = (void*)__get_free_pages(GFP_KERNEL, order);
if (!ret) {
printk(KERN_ERR "%s: failed to alloc %zu bytes\n", __func__, size);
}
memset(ret, 0, size);
return ret;
}
static void
compress_free_pages(void* ptr, size_t size)
{
unsigned int order = get_order(size);
size_t n;
size_t in_use = 0;
if (!ptr) {
return;
}
for (n = 0; n < (1 << order); ++n) {
struct page* pg = virt_to_page(ptr + n * PAGE_SIZE);
int refcount = page_ref_count(pg);
if (n == 0) {
--refcount;
}
if (refcount) {
++in_use;
}
}
if (in_use) {
printk(KERN_ERR "%s: *** %zu of %zu pages in use ***\n", __func__, in_use, n);
return;
}
free_pages((unsigned long)ptr, order);
}
/**************************************
* Core low-level I/O.
*
* pblk count are in units of physical blocks (4096 bytes), NOT sectors.
* data is a page address (obtained via __get_free_pages and friends).
**************************************/
static struct bio*
blkdev_pblk_io_prepare(struct dm_compress* dc, unsigned int op, u64 pblk, u32 count, void *data)
{
unsigned long data_addr;
struct bio* bio;
data_addr = (unsigned long)data;
BUG_ON(data_addr & (PAGE_SIZE-1));
BUG_ON(!virt_addr_valid(data));
bio = bio_alloc(GFP_KERNEL, count);
if (!bio) {
printk(KERN_ERR "%s: out of memory\n", __func__);
return NULL;
}
bio_set_dev(bio, dc->dev->bdev);
bio->bi_opf = op;
bio->bi_iter.bi_sector = (pblk << (PBLK_SHIFT - SECTOR_SHIFT));
while (count--) {
struct page *page = virt_to_page(data);
if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) {
BUG();
}
data = (u8*)data + PAGE_SIZE;
}
return bio;
}
static int
blkdev_pblk_read(struct dm_compress* dc, u64 pblk, u32 count, void *data)
{
int ret;
struct bio* bio;
bio = blkdev_pblk_io_prepare(dc, REQ_OP_READ, pblk, count, data);
if (!bio) {
printk(KERN_ERR "%s: out of memory\n", __func__);
return -ENOMEM;
}
ret = submit_bio_wait(bio);
if (ret != 0) {
printk(KERN_ERR "%s: submit_bio_wait failed: %d\n", __func__, ret);
}
bio_put(bio);
return ret;
}
2019-10-10 18:23:34 +02:00
static void
blkdev_pblk_write_endio(struct bio* bio)
{
void* data = page_address(bio->bi_io_vec[0].bv_page);
unsigned int count = bio->bi_max_vecs;
compress_free_pages(data, count);
if (bio->bi_status != BLK_STS_OK) {
struct dm_compress* dc = bio->bi_private;
dc->io_failed = true;
}
2019-10-10 18:23:34 +02:00
bio_put(bio);
}
static void
blkdev_pblk_write(struct dm_compress* dc, u64 pblk, u32 count, void *data)
{
struct bio* bio;
bio = blkdev_pblk_io_prepare(dc, REQ_OP_WRITE, pblk, count, data);
if (!bio) {
printk(KERN_ERR "%s: out of memory\n", __func__);
2019-10-10 18:23:34 +02:00
return;
}
2019-10-10 18:23:34 +02:00
bio->bi_end_io = blkdev_pblk_write_endio;
bio->bi_private = dc;
2019-10-10 18:23:34 +02:00
submit_bio(bio);
}
/**************************************
* Zone pblk functions
**************************************/
static int
2019-10-11 00:07:39 +02:00
pblk_alloc_write(struct dm_compress* dc, struct zone_cache* zc)
{
u64 pblk;
u32 count;
2019-10-10 18:23:34 +02:00
void* pg;
2019-10-11 00:07:39 +02:00
BUG_ON(zc->pblk_alloc_idx == ZONE_NONE);
pblk = pblk_alloc_off(&dc->params, zc->pblk_alloc_idx);
count = pblk_alloc_len(&dc->params);
2019-10-10 18:23:34 +02:00
pg = compress_alloc_pages(PBLK_SIZE);
if (!pg) {
return -ENOMEM;
}
2019-10-11 00:07:39 +02:00
memcpy(pg, zc->pblk_alloc, count * PBLK_SIZE);
blkdev_pblk_write(dc, pblk, count, pg);
2019-10-11 00:07:39 +02:00
zc->pblk_alloc_dirty = false;
return 0;
}
static int
2019-10-11 00:07:39 +02:00
pblk_alloc_flush(struct dm_compress* dc, struct zone_cache* zc)
{
2019-10-11 00:07:39 +02:00
int ret = 0;
2019-10-11 00:07:39 +02:00
if (zc->pblk_alloc_dirty) {
ret = pblk_alloc_write(dc, zc);
}
2019-10-11 00:07:39 +02:00
return ret;
}
/* Read zone physical block alloc bitmap */
static int
2019-10-11 00:07:39 +02:00
pblk_alloc_read(struct dm_compress* dc, struct zone_cache* zc)
{
int ret;
u64 pblk;
u32 count;
2019-10-11 00:07:39 +02:00
if (zc->pblk_alloc_idx == zc->zone) {
return 0;
}
2019-10-11 00:07:39 +02:00
ret = pblk_alloc_flush(dc, zc);
if (ret) {
return ret;
}
2019-10-11 00:07:39 +02:00
pblk = pblk_alloc_off(&dc->params, zc->zone);
count = pblk_alloc_len(&dc->params);
2019-10-11 00:07:39 +02:00
ret = blkdev_pblk_read(dc, pblk, count, zc->pblk_alloc);
if (ret) {
return ret;
}
2019-10-11 00:07:39 +02:00
zc->pblk_alloc_idx = zc->zone;
return 0;
}
/*
* Get (allocate) one pblk from the currently cached zone pblk alloc bitmap.
* XXX: get rid of this function and use pblk_alloc directly in lblk_write().
*/
static u64
2019-10-11 00:07:39 +02:00
pblk_alloc_get(struct dm_compress* dc, struct zone_cache* zc_hint)
{
u32 zone_pblk_count = pblk_alloc_len(&dc->params) * PBLK_SIZE_BITS;
2019-10-11 00:07:39 +02:00
struct zone_cache* zc;
u32 zone;
u32 idx;
2019-10-11 00:07:39 +02:00
zc = zc_hint;
zone = zc->zone;
/* XXX: check both forward and backward */
do {
2019-10-11 00:07:39 +02:00
if (pblk_alloc_read(dc, zc) != 0) {
printk(KERN_ERR " pblk_alloc_read failed\n");
return 0;
}
2019-10-11 00:07:39 +02:00
idx = cbd_bitmap_alloc(zc->pblk_alloc, zone_pblk_count);
if (idx != zone_pblk_count) {
2019-10-11 00:07:39 +02:00
zc->pblk_alloc_dirty = true;
if (zc != zc_hint) {
zone_cache_put(dc, zc);
}
return zone_data_off(&dc->params, zone) + idx;
}
++zone;
if (zone == dc->params.nr_zones) {
zone = 0;
}
2019-10-11 00:07:39 +02:00
zc = zone_cache_get(dc, zone);
}
2019-10-11 00:07:39 +02:00
while (zc != zc_hint);
printk(KERN_ERR "%s: fail, all zones full\n", __func__);
return 0;
}
/*
* Put (free) one pblk into the currently cached zone pblk alloc bitmap.
* XXX: get rid of this function and use pblk_free directly in lblk_write().
*/
static int
2019-10-11 00:07:39 +02:00
pblk_alloc_put(struct dm_compress* dc, struct zone_cache* zc, u64 pblk)
{
u32 zone_pblk_count = pblk_alloc_len(&dc->params) * PBLK_SIZE_BITS;
2019-10-11 00:07:39 +02:00
bool put_zone = false;
u32 zone;
u32 idx;
int ret;
if (pblk < CBD_HEADER_BLOCKS) {
printk(KERN_ERR "%s: pblk index is in header\n", __func__);
return -EINVAL;
}
zone = (pblk - CBD_HEADER_BLOCKS) / zone_len(&dc->params);
if (zone >= dc->params.nr_zones) {
printk(KERN_ERR "%s: pblk zone out of bounds\n", __func__);
return -EINVAL;
}
if (pblk < zone_data_off(&dc->params, zone)) {
printk(KERN_ERR "%s: pblk index in metadata\n", __func__);
return -EINVAL;
}
idx = pblk - zone_data_off(&dc->params, zone);
if (idx >= zone_pblk_count) {
printk(KERN_ERR "%s: pblk index out of bounds\n", __func__);
return -EINVAL;
}
2019-10-11 00:07:39 +02:00
if (zone != zc->zone) {
zc = zone_cache_get(dc, zone);
put_zone = true;
}
2019-10-11 00:07:39 +02:00
ret = pblk_alloc_read(dc, zc);
if (ret) {
goto out_put;
}
cbd_bitmap_free(zc->pblk_alloc, idx);
zc->pblk_alloc_dirty = true;
out_put:
if (put_zone) {
zone_cache_put(dc, zc);
}
return ret;
}
/**************************************
* Zone lblk functions
**************************************/
static int
2019-10-11 00:07:39 +02:00
lblk_alloc_elem_write(struct dm_compress* dc, struct zone_cache* zc)
{
2019-10-09 23:41:13 +02:00
u32 zone;
u32 zone_lblk;
u32 elem_off;
u32 elem_end;
u32 rel_pblk;
u32 count;
2019-10-09 23:41:13 +02:00
u64 pblk;
u8* buf;
2019-10-10 18:23:34 +02:00
void* pg;
2019-10-09 23:41:13 +02:00
2019-10-11 00:07:39 +02:00
BUG_ON(zc->lblk_alloc_elem_lblk == LBLK_NONE);
BUG_ON(zc->lblk_alloc_pblk == PBLK_NONE);
BUG_ON(zc->lblk_alloc_len == 0);
2019-10-09 23:41:13 +02:00
2019-10-11 00:07:39 +02:00
zone = zc->lblk_alloc_elem_lblk / dc->params.lblk_per_zone;
zone_lblk = zc->lblk_alloc_elem_lblk - (zc->zone * dc->params.lblk_per_zone);
2019-10-09 23:41:13 +02:00
elem_off = lblk_alloc_elem_len(&dc->params) * zone_lblk;
elem_end = elem_off + lblk_alloc_elem_len(&dc->params);
rel_pblk = elem_off / PBLK_SIZE;
2019-10-11 00:07:39 +02:00
count = zc->lblk_alloc_len;
pblk = zc->lblk_alloc_pblk;
buf = zc->lblk_alloc + (elem_off - rel_pblk * PBLK_SIZE);
lblk_alloc_elem_put(&dc->params, buf, zc->lblk_alloc_elem);
2019-10-10 18:23:34 +02:00
pg = compress_alloc_pages(count * PBLK_SIZE);
if (!pg) {
return -ENOMEM;
}
2019-10-11 00:07:39 +02:00
memcpy(pg, zc->lblk_alloc, count * PBLK_SIZE);
blkdev_pblk_write(dc, pblk, count, pg);
2019-10-10 18:23:34 +02:00
return 0;
}
static int
2019-10-11 00:07:39 +02:00
lblk_alloc_elem_read(struct dm_compress* dc, struct zone_cache* zc, u64 lblk)
{
int ret;
2019-10-09 23:41:13 +02:00
u32 zone;
u32 zone_lblk;
u32 elem_off;
u32 elem_end;
u32 rel_pblk;
u32 count;
2019-10-09 23:41:13 +02:00
u64 pblk;
u8* buf;
2019-10-11 00:07:39 +02:00
if (zc->lblk_alloc_elem_lblk == lblk) {
return 0;
}
2019-10-09 23:41:13 +02:00
zone = lblk / dc->params.lblk_per_zone;
zone_lblk = lblk - (zone * dc->params.lblk_per_zone);
elem_off = lblk_alloc_elem_len(&dc->params) * zone_lblk;
elem_end = elem_off + lblk_alloc_elem_len(&dc->params);
rel_pblk = elem_off / PBLK_SIZE;
count = 1 + (elem_end - 1) / PBLK_SIZE - (elem_off / PBLK_SIZE);
pblk = lblk_alloc_off(&dc->params, zone) + rel_pblk;
2019-10-11 00:07:39 +02:00
if (zc->lblk_alloc_pblk != pblk || zc->lblk_alloc_len < count) {
ret = blkdev_pblk_read(dc, pblk, count, zc->lblk_alloc);
2019-10-09 23:41:13 +02:00
if (ret != 0) {
return ret;
}
2019-10-11 00:07:39 +02:00
zc->lblk_alloc_pblk = pblk;
zc->lblk_alloc_len = count;
}
2019-10-11 00:07:39 +02:00
buf = zc->lblk_alloc + (elem_off - rel_pblk * PBLK_SIZE);
lblk_alloc_elem_get(&dc->params, buf, zc->lblk_alloc_elem);
zc->lblk_alloc_elem_lblk = lblk;
2019-10-09 23:41:13 +02:00
return 0;
}
/**************************************
* Logical block functions
**************************************/
/*
* Compress dc->lblk into dc->lz4_cbuf
*
* Returns number of bytes in cbuf or 0 for failure.
*/
static size_t
2019-10-11 00:07:39 +02:00
lblk_compress(struct cbd_params* params, struct zone_cache* zc)
{
int ret;
2019-10-11 00:07:39 +02:00
void *dbuf = zc->lblk;
u32 dlen = PBLK_SIZE * lblk_per_pblk(params);
void *cbuf = zc->lz4_cbuf;
u32 clen = PBLK_SIZE * lblk_per_pblk(params);
2019-10-11 00:07:39 +02:00
ret = LZ4_compress_default(dbuf, cbuf, dlen, clen, zc->lz4_wrkmem);
if (ret <= 0) {
return 0;
}
return (size_t)ret;
}
/*
* Decompress dc->lz4_cbuf of size clen into dc->lblk
*
* Returns 0 for success, <0 for failure.
*/
static int
2019-10-11 00:07:39 +02:00
lblk_decompress(struct cbd_params* params, struct zone_cache* zc, u32 clen)
{
int ret;
2019-10-11 00:07:39 +02:00
void *cbuf = zc->lz4_cbuf;
void *dbuf = zc->lblk;
u32 dlen = PBLK_SIZE * lblk_per_pblk(params);
ret = LZ4_decompress_safe(cbuf, dbuf, clen, dlen);
if (ret != dlen) {
printk(KERN_ERR "%s: failed, ret=%d (expected %u)\n", __func__, ret, (unsigned int)dlen);
return -1;
}
return 0;
}
static int
2019-10-11 00:07:39 +02:00
lblk_write(struct dm_compress* dc, struct zone_cache* zc)
{
int ret;
u32 zone;
u32 zone_lblk;
u8* elem_buf;
size_t d_len;
size_t c_len;
u8* c_buf;
u32 n;
u64 pblk;
2019-10-11 00:07:39 +02:00
zone = zc->zone;
zone_lblk = zc->lblk_num - (zone * dc->params.lblk_per_zone);
elem_buf = zc->lblk_alloc + zone_lblk * lblk_alloc_elem_len(&dc->params);
2019-10-09 23:41:13 +02:00
/* We must have a cached lblk elem */
2019-10-11 00:07:39 +02:00
BUG_ON(zc->lblk_alloc_elem_lblk == LBLK_NONE);
d_len = PBLK_SIZE * lblk_per_pblk(&dc->params);
#ifdef CBD_DETECT_ZERO_BLOCKS
2019-10-11 00:07:39 +02:00
if (memcmpz(zc->lblk, d_len) == 0) {
#else
if (0) {
#endif
c_len = 0;
c_buf = NULL;
2019-10-11 00:07:39 +02:00
zc->lblk_alloc_elem->len = 0;
}
else {
2019-10-11 00:07:39 +02:00
c_len = lblk_compress(&dc->params, zc);
if (c_len > 0) {
2019-10-10 18:23:34 +02:00
size_t c_blkrem = c_len % PBLK_SIZE;
if (c_blkrem) {
2019-10-11 00:07:39 +02:00
memset(zc->lz4_cbuf + c_len, 0, c_blkrem);
2019-10-10 18:23:34 +02:00
}
2019-10-11 00:07:39 +02:00
c_buf = zc->lz4_cbuf;
zc->lblk_alloc_elem->len = c_len;
}
else {
c_len = d_len;
2019-10-11 00:07:39 +02:00
c_buf = zc->lblk;
zc->lblk_alloc_elem->len = CBD_UNCOMPRESSED;
}
}
for (n = 0; n < lblk_per_pblk(&dc->params); ++n) {
if (c_len > PBLK_SIZE * n) {
2019-10-10 18:23:34 +02:00
void* pg;
2019-10-11 00:07:39 +02:00
pblk = zc->lblk_alloc_elem->pblk[n];
if (!pblk) {
2019-10-11 00:07:39 +02:00
pblk = pblk_alloc_get(dc, zc);
if (pblk == 0) {
printk(KERN_ERR " pblk_alloc_get failed\n");
return -ENOSPC;
}
2019-10-11 00:07:39 +02:00
zc->lblk_alloc_elem->pblk[n] = pblk;
}
2019-10-10 18:23:34 +02:00
pg = compress_alloc_pages(PBLK_SIZE);
if (!pg) {
return -ENOMEM;
}
memcpy(pg, c_buf, PBLK_SIZE);
blkdev_pblk_write(dc, pblk, 1, pg);
c_buf += PBLK_SIZE;
}
else {
2019-10-11 00:07:39 +02:00
pblk = zc->lblk_alloc_elem->pblk[n];
if (pblk) {
2019-10-11 00:07:39 +02:00
zc->lblk_alloc_elem->pblk[n] = 0;
ret = pblk_alloc_put(dc, zc, pblk);
if (ret != 0) {
printk(KERN_ERR " pblk_alloc_put failed\n");
return ret;
}
}
}
}
2019-10-11 00:07:39 +02:00
ret = lblk_alloc_elem_write(dc, zc);
if (ret != 0) {
2019-10-09 23:41:13 +02:00
printk(KERN_ERR " lblk_alloc_elem_write failed\n");
return ret;
}
2019-10-11 00:07:39 +02:00
ret = pblk_alloc_flush(dc, zc);
if (ret != 0) {
printk(KERN_ERR " pblk_alloc_flush failed\n");
return ret;
}
2019-10-11 00:07:39 +02:00
zc->lblk_dirty = false;
return 0;
}
static int
2019-10-11 00:07:39 +02:00
lblk_flush(struct dm_compress* dc, struct zone_cache* zc)
{
int ret;
2019-10-11 00:07:39 +02:00
if (zc->lblk_dirty) {
ret = lblk_write(dc, zc);
if (ret) {
return ret;
}
}
return 0;
}
static int
2019-10-11 00:07:39 +02:00
lblk_read(struct dm_compress* dc, struct zone_cache* zc, u64 idx)
{
int ret;
u32 zone;
u32 zone_lblk;
u8* elem_buf;
u32 c_len;
u64 pblk;
2019-10-11 00:07:39 +02:00
if (zc->lblk_num == idx) {
return 0;
}
2019-10-11 00:07:39 +02:00
ret = lblk_flush(dc, zc);
if (ret) {
return ret;
}
zone = idx / dc->params.lblk_per_zone;
zone_lblk = idx - (zone * dc->params.lblk_per_zone);
2019-10-11 00:07:39 +02:00
elem_buf = zc->lblk_alloc + zone_lblk * lblk_alloc_elem_len(&dc->params);
2019-10-11 00:07:39 +02:00
ret = lblk_alloc_elem_read(dc, zc, idx);
if (ret != 0) {
2019-10-09 23:41:13 +02:00
printk(KERN_ERR " lblk_alloc_elem_read failed\n");
return ret;
}
2019-10-11 00:07:39 +02:00
c_len = zc->lblk_alloc_elem->len;
if (c_len == 0) {
2019-10-11 00:07:39 +02:00
memset(zc->lblk, 0, PBLK_SIZE * lblk_per_pblk(&dc->params));
}
else {
bool is_compressed = true;
size_t d_len = PBLK_SIZE * lblk_per_pblk(&dc->params);
size_t n;
u8* p;
if (c_len == CBD_UNCOMPRESSED) {
is_compressed = false;
c_len = d_len;
}
2019-10-11 00:07:39 +02:00
p = zc->lz4_cbuf;
for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) {
2019-10-11 00:07:39 +02:00
pblk = zc->lblk_alloc_elem->pblk[n];
BUG_ON(pblk == 0);
ret = blkdev_pblk_read(dc, pblk, 1, p);
if (ret != 0) {
return ret;
}
}
if (is_compressed) {
2019-10-11 00:07:39 +02:00
if (lblk_decompress(&dc->params, zc, c_len) != 0) {
printk(KERN_ERR " decompress failed\n");
return -1;
}
}
else {
2019-10-11 00:07:39 +02:00
memcpy(zc->lblk, zc->lz4_cbuf, d_len);
}
}
2019-10-11 00:07:39 +02:00
zc->lblk_num = idx;
return 0;
}
/**************************************
2019-10-11 00:07:39 +02:00
* Zone cache functions
**************************************/
static void
2019-10-11 00:07:39 +02:00
zone_cache_reset(struct zone_cache* zc, u32 zone)
{
zc->zone = zone;
zc->pblk_alloc_idx = ZONE_NONE;
zc->pblk_alloc_dirty = false;
zc->lblk_alloc_pblk = PBLK_NONE;
zc->lblk_alloc_len = 0;
zc->lblk_alloc_elem_lblk = LBLK_NONE;
zc->lblk_num = LBLK_NONE;
zc->lblk_dirty = false;
}
static int
zone_cache_flush(struct dm_compress* dc, struct zone_cache* zc)
{
int ret;
ret = lblk_flush(dc, zc);
if (ret) {
return ret;
}
ret = pblk_alloc_flush(dc, zc);
if (ret) {
return ret;
}
return 0;
}
static struct zone_cache*
zone_cache_get(struct dm_compress* dc, u32 zone)
{
struct zone_cache* zc;
u32 idx;
//printk(KERN_INFO "%s: zone=%u\n", __func__, (unsigned int)zone);
mutex_lock(&dc->zc_lock);
for (idx = 0; idx < dc->nr_zc; ++idx) {
zc = &dc->zcache[idx];
if (zc->zone == zone) {
mutex_lock(&zc->lock);
goto out;
}
}
for (idx = 0; idx < dc->nr_zc; ++idx) {
zc = &dc->zcache[idx];
if (zc->zone == ZONE_NONE) {
zone_cache_reset(zc, zone);
mutex_lock(&zc->lock);
goto out;
}
}
for (idx = 0; idx < dc->nr_zc; ++idx) {
zc = &dc->zcache[idx];
if (mutex_trylock(&zc->lock) == 1) {
zone_cache_reset(zc, zone);
goto out;
}
}
printk(KERN_ERR "%s: Cannot get zone %u\n", __func__, (unsigned int)zone);
zc = NULL;
out:
mutex_unlock(&dc->zc_lock);
return zc;
}
static int
zone_cache_put(struct dm_compress* dc, struct zone_cache* zc)
{
int ret;
//printk(KERN_INFO "%s: zone=%u\n", __func__, (unsigned int)zc->zone);
ret = zone_cache_flush(dc, zc);
mutex_unlock(&zc->lock);
return ret;
}
static void
zone_cache_dtr(struct dm_compress* dc, struct zone_cache* zc)
{
2019-10-11 00:07:39 +02:00
compress_free_pages(zc->lblk, PBLK_SIZE * lblk_per_pblk(&dc->params));
zc->lblk = NULL;
2019-10-11 00:07:39 +02:00
kfree(zc->lblk_alloc_elem);
zc->lblk_alloc_elem = NULL;
2019-10-11 00:07:39 +02:00
compress_free_pages(zc->lblk_alloc, PBLK_SIZE * 2);
zc->lblk_alloc = NULL;
2019-10-11 00:07:39 +02:00
compress_free_pages(zc->pblk_alloc, PBLK_SIZE * pblk_alloc_len(&dc->params));
zc->pblk_alloc = NULL;
2019-10-11 00:07:39 +02:00
compress_free_pages(zc->lz4_cbuf, PBLK_SIZE * lblk_per_pblk(&dc->params));
zc->lz4_cbuf = NULL;
2019-10-11 00:07:39 +02:00
kfree(zc->lz4_wrkmem);
zc->lz4_wrkmem = NULL;
}
static int
2019-10-11 00:07:39 +02:00
zone_cache_ctr(struct dm_compress* dc, struct zone_cache* zc)
{
2019-10-11 00:07:39 +02:00
zc->zone = ZONE_NONE;
mutex_init(&zc->lock);
zc->lz4_wrkmem = kmalloc(LZ4_compressBound(PBLK_SIZE * lblk_per_pblk(&dc->params)), GFP_KERNEL);
if (!zc->lz4_wrkmem) {
printk(KERN_ERR "%s: Failed to alloc lz4_wrkmem\n", __func__);
goto out_nomem;
}
2019-10-11 00:07:39 +02:00
zc->lz4_cbuf = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params));
if (!zc->lz4_cbuf) {
printk(KERN_ERR "%s: Failed to alloc lz4_cmem\n", __func__);
goto out_nomem;
}
2019-10-11 00:07:39 +02:00
zc->pblk_alloc_idx = ZONE_NONE;
zc->pblk_alloc_dirty = false;
zc->pblk_alloc = compress_alloc_pages(PBLK_SIZE * pblk_alloc_len(&dc->params));
if (!zc->pblk_alloc) {
printk(KERN_ERR "%s: Failed to alloc pblk_alloc\n", __func__);
goto out_nomem;
}
2019-10-11 00:07:39 +02:00
zc->lblk_alloc_pblk = PBLK_NONE;
zc->lblk_alloc_len = 0;
zc->lblk_alloc = compress_alloc_pages(PBLK_SIZE * 2);
if (!zc->lblk_alloc) {
printk(KERN_ERR "%s: Failed to alloc lblk_alloc\n", __func__);
goto out_nomem;
}
2019-10-11 00:07:39 +02:00
zc->lblk_alloc_elem_lblk = LBLK_NONE;
zc->lblk_alloc_elem = kmalloc(offsetof(struct lblk_alloc_elem, pblk[lblk_per_pblk(&dc->params)]), GFP_KERNEL);
if (!zc->lblk_alloc_elem) {
printk(KERN_ERR "%s: Failed to alloc lblk_alloc_elem\n", __func__);
goto out_nomem;
}
2019-10-11 00:07:39 +02:00
zc->lblk_num = LBLK_NONE;
zc->lblk_dirty = false;
zc->lblk = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params));
if (!zc->lblk) {
printk(KERN_ERR "%s: Failed to alloc lblk\n", __func__);
goto out_nomem;
}
return 0;
out_nomem:
2019-10-11 00:07:39 +02:00
zone_cache_dtr(dc, zc);
return -ENOMEM;
}
2019-10-11 00:07:39 +02:00
/**************************************
* Main functions
**************************************/
static int
compress_open(struct dm_compress* dc, u64 dev_nr_pblks)
{
int err;
u8 *pblkbuf;
struct cbd_header header;
u64 max_nr_zones;
2019-10-11 00:07:39 +02:00
unsigned int n;
pblkbuf = kmalloc(PBLK_SIZE, GFP_KERNEL);
if (!pblkbuf) {
return -ENOMEM;
}
err = blkdev_pblk_read(dc, 0, 1, pblkbuf);
if (err) {
printk(KERN_ERR "%s: failed to read header\n", __func__);
goto out;
}
cbd_header_get(pblkbuf, &header);
if (memcmp(header.magic, CBD_MAGIC, sizeof(header.magic)) != 0) {
printk(KERN_ERR "%s: bad magic\n", __func__);
err = -EINVAL;
goto out;
}
if (header.version_major != CBD_VERSION_MAJOR) {
printk(KERN_ERR "%s: bad version\n", __func__);
err = -EINVAL;
goto out;
}
if (header.version_minor != CBD_VERSION_MINOR) {
printk(KERN_ERR "%s: bad version\n", __func__);
err = -EINVAL;
goto out;
}
if (header.params.lblk_shift < LBLK_SHIFT_MIN ||
header.params.lblk_shift > LBLK_SHIFT_MAX) {
printk(KERN_ERR "%s: bad lblk_shift\n", __func__);
err = -EINVAL;
goto out;
}
/* XXX: validate minumum pblk using zone_off(max_zone+1) */
if (header.params.nr_pblk > dev_nr_pblks) {
printk(KERN_ERR "%s: bad nr_pblk\n", __func__);
err = -EINVAL;
goto out;
}
max_nr_zones = (dev_nr_pblks - CBD_HEADER_BLOCKS) / zone_len(&header.params);
if (header.params.nr_zones > max_nr_zones) {
printk(KERN_ERR "%s: bad nr_zones\n", __func__);
err = -EINVAL;
goto out;
}
/* XXX: validate lblk_per_zone */
printk(KERN_INFO "%s: parameters...\n", __func__);
printk(KERN_INFO " algorithm=%hu\n", (unsigned short)header.params.algorithm);
printk(KERN_INFO " compression=%hu\n", (unsigned short)header.params.compression);
printk(KERN_INFO " lblk_shift=%hu\n", (unsigned short)header.params.lblk_shift);
printk(KERN_INFO " nr_pblk=%lu\n", (unsigned long)header.params.nr_pblk);
printk(KERN_INFO " nr_zones=%u\n", (unsigned int)header.params.nr_zones);
printk(KERN_INFO " lblk_per_zone=%u\n", (unsigned int)header.params.lblk_per_zone);
memcpy(&dc->params, &header.params, sizeof(header.params));
2019-10-11 00:07:39 +02:00
mutex_init(&dc->zc_lock);
dc->nr_zc = min(2 * num_online_cpus(), dc->params.nr_zones);
dc->zcache = kmalloc(dc->nr_zc * sizeof(struct zone_cache), GFP_KERNEL);
if (!dc->zcache) {
printk(KERN_ERR "%s: out of memory\n", __func__);
goto out;
}
2019-10-11 00:07:39 +02:00
for (n = 0; n < dc->nr_zc; ++n) {
err = zone_cache_ctr(dc, &dc->zcache[n]);
if (err) {
printk(KERN_ERR "%s: failed to init zone cache\n", __func__);
goto out;
}
}
dc->io_queue = alloc_workqueue("kcompress_io", WQ_HIGHPRI | WQ_MEM_RECLAIM, 1);
if (!dc->io_queue) {
printk(KERN_ERR "%s: failed to alloc io_queue\n", __func__);
2019-10-11 00:07:39 +02:00
err = -ENOMEM;
goto out;
}
out:
2019-10-11 00:07:39 +02:00
/* XXX: cleanup on error */
kfree(pblkbuf);
return err;
}
static int
compress_read(struct dm_compress *dc, struct bio *bio)
{
struct bio_vec bv;
struct bvec_iter iter;
int ret;
u32 lblk_per_sector = lblk_per_pblk(&dc->params) * PBLK_PER_SECTOR;
u32 lblk_len = lblk_per_sector * SECTOR_SIZE;
bio_for_each_segment(bv, bio, iter) {
sector_t lblk = iter.bi_sector / lblk_per_sector;
u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
2019-10-11 00:07:39 +02:00
u32 zone = lblk / dc->params.lblk_per_zone;
struct zone_cache* zc = NULL;
unsigned long flags;
char* data;
2019-10-11 00:07:39 +02:00
zc = zone_cache_get(dc, zone);
/* Ensure the data is within the logical block */
if (lblk_off + bv.bv_len > lblk_len) {
printk(KERN_ERR "%s: logical block bounds exceeded\n", __func__);
return -EIO;
}
/* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */
2019-10-11 00:07:39 +02:00
ret = lblk_read(dc, zc, lblk);
if (ret) {
2019-10-11 00:07:39 +02:00
zone_cache_put(dc, zc);
return ret;
}
data = bvec_kmap_irq(&bv, &flags);
2019-10-11 00:07:39 +02:00
memcpy(data, zc->lblk + lblk_off, bv.bv_len);
bvec_kunmap_irq(data, &flags);
2019-10-11 00:07:39 +02:00
zone_cache_put(dc, zc);
}
return 0;
}
static int
compress_write(struct dm_compress *dc, struct bio *bio)
{
struct bio_vec bv;
struct bvec_iter iter;
int ret;
u32 lblk_per_sector = lblk_per_pblk(&dc->params) * PBLK_PER_SECTOR;
u32 lblk_len = lblk_per_sector * SECTOR_SIZE;
bio_for_each_segment(bv, bio, iter) {
sector_t lblk = iter.bi_sector / lblk_per_sector;
u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
2019-10-11 00:07:39 +02:00
u32 zone = lblk / dc->params.lblk_per_zone;
struct zone_cache* zc = NULL;
unsigned long flags;
char* data;
2019-10-11 00:07:39 +02:00
zc = zone_cache_get(dc, zone);
/* Ensure the data is within the logical block */
if (lblk_off + bv.bv_len > lblk_len) {
printk(KERN_ERR "%s logical block bounds exceeded\n", __func__);
printk(KERN_ERR " sector=%lu\n", (unsigned long)iter.bi_sector);
printk(KERN_ERR " bv_len=%u bv_offset=%u\n", bv.bv_len, bv.bv_offset);
printk(KERN_ERR " lblk=%lu lblk_off=%u\n", (unsigned long)lblk, lblk_off);
return -EIO;
}
/* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */
2019-10-11 00:07:39 +02:00
ret = lblk_read(dc, zc, lblk);
if (ret) {
2019-10-11 00:07:39 +02:00
zone_cache_put(dc, zc);
return ret;
}
data = bvec_kmap_irq(&bv, &flags);
2019-10-11 00:07:39 +02:00
memcpy(zc->lblk + lblk_off, data, bv.bv_len);
bvec_kunmap_irq(data, &flags);
2019-10-11 00:07:39 +02:00
zc->lblk_dirty = true;
zone_cache_put(dc, zc);
}
return 0;
}
static void compress_io(struct dm_compress_io* io)
{
int ret;
struct dm_compress* dc = io->dc;
struct bio* bio = io->bio;
switch (bio_op(bio)) {
case REQ_OP_READ:
ret = compress_read(dc, bio);
break;
case REQ_OP_WRITE:
ret = compress_write(dc, bio);
break;
default:
printk(KERN_ERR "%s: unknown op in bio: %u\n", __func__, bio_op(bio));
ret = -EINVAL;
}
if (ret) {
printk(KERN_ERR "%s: failed, ret=%d\n", __func__, ret);
}
bio->bi_status = (ret == 0 ? BLK_STS_OK : BLK_STS_IOERR); /* XXX */
bio_endio(bio);
}
#ifdef USE_WORKQUEUE
static void
compress_io_work(struct work_struct *work)
{
struct dm_compress_io *io = container_of(work, struct dm_compress_io, work);
compress_io(io);
}
#endif
/*
* Usage:
* echo "<start_sector> <end_sector> compress <backing_device> <args...>" | dmsetup create <compress_name>
* Where:
* start_sector is the starting sector of the backing device.
* end_sector is the ending sector of the backing device.
* compress is the name of this module.
* backing_device is the name backing device.
* args is:
* create [lblk_shift=#]
* open
* compress_name is the name of the compress device.
*/
static int
compress_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
int err;
unsigned int argn;
struct dm_compress *dc = NULL;
u64 dev_nr_pblks;
printk(KERN_INFO "%s: enter: argc=%u\n", __func__, argc);
for (argn = 0; argn < argc; ++argn) {
printk(KERN_INFO " ... arg[%u]=\"%s\"\n", argn, argv[argn]);
}
if (argc == 0) {
ti->error = "No device specified";
return -EINVAL;
}
argn = 1;
while (argn < argc) {
const char* arg = argv[argn++];
const char* eq = strchr(arg, '=');
if (!eq) {
ti->error = "Invalid argument format";
return -EINVAL;
}
#if 0
if (!memcmp(arg, "verbose", 7)) {
err = kstrtouint(eq + 1, 0, &verbose_level);
if (err) {
ti->error = "Failed to parse verbose";
return -EINVAL;
}
continue;
}
#endif
ti->error = "Unrecognized argument";
return -EINVAL;
}
dc = kzalloc(sizeof(struct dm_compress), GFP_KERNEL);
if (!dc) {
ti->error = "Failed to allocate target";
return -ENOMEM;
}
if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dc->dev)) {
ti->error = "Device lookup failed";
kfree(dc);
return -EINVAL;
}
ti->private = dc;
dev_nr_pblks = dm_target_pblk_size(ti);
if (get_order(dev_nr_pblks) >= 48) {
ti->error = "Device too large";
kfree(dc);
return -EINVAL;
}
ti->per_io_data_size = ALIGN(sizeof(struct dm_compress_io), ARCH_KMALLOC_MINALIGN);
err = compress_open(dc, dev_nr_pblks);
if (err) {
dm_put_device(ti, dc->dev);
kfree(dc);
return err;
}
printk(KERN_INFO "%s: success\n", __func__);
return 0;
}
static void
compress_dtr(struct dm_target *ti)
{
struct dm_compress *dc;
2019-10-11 00:07:39 +02:00
unsigned int n;
printk(KERN_INFO "%s: enter\n", __func__);
dc = (struct dm_compress *)ti->private;
2019-10-11 00:07:39 +02:00
if (dc->zcache) {
for (n = 0; n < dc->nr_zc; ++n) {
zone_cache_dtr(dc, &dc->zcache[n]);
}
kfree(dc->zcache);
}
if (dc->io_queue) {
destroy_workqueue(dc->io_queue);
}
dm_put_device(ti, dc->dev);
kfree(dc);
}
static int
compress_map(struct dm_target *ti, struct bio *bio)
{
struct dm_compress *dc = (struct dm_compress *)ti->private;
struct dm_compress_io *io;
if (dc->io_failed) {
return DM_MAPIO_KILL;
}
/* from dm-crypt.c */
if (unlikely(bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)) {
bio_set_dev(bio, dc->dev->bdev);
if (bio_sectors(bio)) {
/* XXX: remap to underlying data */
}
return DM_MAPIO_REMAPPED;
}
/* Synchronous I/O operations deadlock, so queue them. */
/* XXX: clone the bio? */
io = dm_per_bio_data(bio, ti->per_io_data_size);
io->dc = dc;
io->bio = bio;
#ifdef USE_WORKQUEUE
INIT_WORK(&io->work, compress_io_work);
queue_work(dc->io_queue, &io->work);
#else
compress_io(io);
#endif
return DM_MAPIO_SUBMITTED;
}
static struct target_type compress_target = {
.name = "compress",
.version = { 1, 0, 0 },
.module = THIS_MODULE,
.ctr = compress_ctr,
.dtr = compress_dtr,
.map = compress_map,
};
static int __init
dm_compress_init(void)
{
int res;
res = dm_register_target(&compress_target);
if (res < 0) {
printk(KERN_ERR "Failed to register dm-compress: %d\n", res);
}
return res;
}
static void __exit
dm_compress_exit(void)
{
dm_unregister_target(&compress_target);
}
module_init(dm_compress_init);
module_exit(dm_compress_exit);
MODULE_DESCRIPTION("compress target for transparent compression");
MODULE_AUTHOR("Tom Marshall <tdm.code@gmail.com>");
MODULE_LICENSE("GPL");
MODULE_VERSION("1.0");