cbd/dm-compress/lbd.c

562 lines
14 KiB
C
Raw Normal View History

/*
* Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/device-mapper.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/lz4.h>
#include <linux/dm-compress.h>
struct lbd {
u64 lblk;
struct mutex reflock;
unsigned int ref;
struct mutex lock;
enum cache_state state;
struct cbd_params* params;
struct lbatviewcache* lvc;
struct lbatview* lv;
u8* lz4_wrkmem;
struct page* lz4_cpages;
u8* lz4_cbuf;
struct page* pages;
u8* buf;
};
/*
* Allocating lz4_wrkmem percpu:
*
* If the alloc is per-instance, it would need to be allocated in compress.c
* and passed around. The easiest way to pass it around is likely to make it
* part of a struct. We can't use struct compress because that is private.
* So we would need to create a struct (say, compress_percpu).
*
* If the alloc is global, we can just declare it file-local. But it would
* need to be the largest possible size. Which means we probably don't want
* to use alloc_percpu_gfp() directly, because 1mb chunks are probably not
* that common. So suppose we allocate a percpu vector of page ptrs.
*
* #define COMPRESS_MAX_INPUT_SIZE (1 << LBLK_SHIFT_MAX)
* #define COMPRESS_LZ4_BOUND LZ4_COMPRESSBOUND(COMPRESS_MAX_INPUT_SIZE)
* #define WRKMEM_PAGES DIV_ROUND_UP(COMPRESS_LZ4_BOUND, PAGE_SIZE)
* typedef struct page*[WRKMEM_PAGES] lz4_wrkmem_pagev_t;
*
* g_lz4_wrkmem = alloc_percpu_gfp(lz4_wrkmem_pagev_t, GFP_IO);
*
* That's not bad at all. But how do we alloc (and free) the actual pages?
*
* pagev = get_cpu_var(g_lz4_wrkmem);
* put_cpu_var(pagev);
*
* free_percpu(g_lz4_wrkmem);
*/
static inline bool
lblk_is_zeros(struct cbd_params* params, struct lbd* lbd)
{
#ifdef CBD_DETECT_ZERO_BLOCKS
u32 off;
u32 len = PBLK_SIZE * lblk_per_pblk(params);
for (off = 0; off < len; ++off) {
if (lbd->lblk_buf[off]) {
return false;
}
}
return true;
#else
return false;
#endif
}
/*
* Compress dc->lblk into dc->lz4_cbuf
*
* Returns number of bytes in cbuf or 0 for failure.
*/
static size_t
lblk_compress(struct lbd* lbd)
{
int ret;
void *dbuf = lbd->buf;
u32 dlen = PBLK_SIZE * lblk_per_pblk(lbd->params);
void *cbuf = lbd->lz4_cbuf;
u32 clen = PBLK_SIZE * lblk_per_pblk(lbd->params);
ret = LZ4_compress_default(dbuf, cbuf, dlen, clen, lbd->lz4_wrkmem);
if (ret <= 0) {
return 0;
}
return (size_t)ret;
}
/*
* Decompress dc->lz4_cbuf of size clen into dc->lblk
*
* Returns 0 for success, <0 for failure.
*/
static int
lblk_decompress(struct lbd* lbd, u32 clen)
{
int ret;
void *cbuf = lbd->lz4_cbuf;
void *dbuf = lbd->buf;
u32 dlen = PBLK_SIZE * lblk_per_pblk(lbd->params);
ret = LZ4_decompress_safe(cbuf, dbuf, clen, dlen);
if (ret != dlen) {
printk(KERN_ERR "%s: failed, ret=%d (expected %u)\n", __func__, ret, (unsigned int)dlen);
return -1;
}
return 0;
}
bool
lbd_ctr(struct lbd* lbd,
struct cbd_params* params,
struct lbatviewcache* lvc)
{
memset(lbd, 0, sizeof(struct lbd));
lbd->lblk = LBLK_NONE;
mutex_init(&lbd->reflock);
lbd->ref = 0;
mutex_init(&lbd->lock);
lbd->state = CACHE_STATE_UNCACHED;
lbd->params = params;
lbd->lvc = lvc;
lbd->lv = NULL;
lbd->lz4_wrkmem = kmalloc(LZ4_compressBound(PBLK_SIZE * lblk_per_pblk(lbd->params)), GFP_KERNEL);
if (!lbd->lz4_wrkmem) {
return false;
}
lbd->lz4_cpages = cbd_alloc_pages(lblk_per_pblk(lbd->params));
if (!lbd->lz4_cpages) {
return false;
}
lbd->lz4_cbuf = page_address(lbd->lz4_cpages);
lbd->pages = cbd_alloc_pages(lblk_per_pblk(lbd->params));
if (!lbd->pages) {
return false;
}
lbd->buf = page_address(lbd->pages);
return true;
}
void
lbd_dtr(struct lbd* lbd)
{
if (lbatviewcache_put(lbd->lvc, lbd->lv) != 0) {
printk(KERN_ERR "%s: lbatviewcache_put failed\n", __func__);
}
lbd->lv = NULL;
cbd_free_pages(lbd->pages, lblk_per_pblk(lbd->params));
lbd->pages = NULL;
lbd->buf = NULL;
cbd_free_pages(lbd->lz4_cpages, lblk_per_pblk(lbd->params));
lbd->lz4_cpages = NULL;
lbd->lz4_cbuf = NULL;
kfree(lbd->lz4_wrkmem);
lbd->lz4_wrkmem = NULL;
}
static void
lbd_flush_endio(struct bio* bio)
{
struct lbd* lbd = bio->bi_private;
int ret;
cbd_free_page(bio->bi_io_vec[0].bv_page);
ret = pblk_endio(bio);
if (ret) {
printk(KERN_ERR "%s: I/O failed\n", __func__);
lbd->state = CACHE_STATE_ERROR;
}
}
int
lbd_flush(struct lbd* lbd)
{
int ret = 0;
u32 c_len;
u32 elem_len;
u8* p;
u32 n;
u64 pblk;
struct page* iopagev[1];
mutex_lock(&lbd->lock);
if (lbd->state != CACHE_STATE_DIRTY) {
if (lbd->state == CACHE_STATE_ERROR) {
ret = -EIO;
goto out;
}
goto clean;
}
if (lblk_is_zeros(lbd->params, lbd)) {
c_len = 0;
elem_len = 0;
p = NULL;
}
else {
c_len = lblk_compress(lbd);
if (c_len > 0) {
size_t c_blkrem = c_len % PBLK_SIZE;
if (c_blkrem) {
memset(lbd->lz4_cbuf + c_len, 0, c_blkrem);
}
elem_len = c_len;
p = lbd->lz4_cbuf;
}
else {
c_len = PBLK_SIZE * lblk_per_pblk(lbd->params);
elem_len = CBD_UNCOMPRESSED;
p = lbd->buf;
}
}
ret = lbatview_elem_realloc(lbd->lv, lbd->lblk, elem_len);
if (ret) {
goto out;
}
for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) {
pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n);
if (pblk == PBLK_NONE) {
ret = -EIO;
goto out;
}
iopagev[0] = cbd_alloc_page();
if (!iopagev[0]) {
printk(KERN_ERR "%s: out of memory\n", __func__);
ret = -ENOMEM;
goto out;
}
memcpy(page_address(iopagev[0]), p, PBLK_SIZE);
pblk_write(lbd->params, pblk, 1, iopagev, lbd_flush_endio, lbd);
}
clean:
ret = lbatviewcache_put(lbd->lvc, lbd->lv);
lbd->lv = NULL;
if (ret) {
lbd->state = CACHE_STATE_ERROR;
goto out;
}
lbd->state = CACHE_STATE_CLEAN;
out:
mutex_unlock(&lbd->lock);
return ret;
}
int
lbd_read(struct lbd* lbd)
{
int ret = 0;
u32 c_len;
u64 pblk;
struct page* iopagev[1];
mutex_lock(&lbd->lock);
if (lbd->state != CACHE_STATE_UNCACHED) {
goto out;
}
ret = lbatview_read(lbd->lv);
if (ret) {
goto out;
}
c_len = lbatview_elem_len(lbd->lv, lbd->lblk);
if (c_len == 0) {
memset(lbd->buf, 0, PBLK_SIZE * lblk_per_pblk(lbd->params));
}
else {
bool is_compressed = true;
u32 d_len = PBLK_SIZE * lblk_per_pblk(lbd->params);
u32 n;
u8* p;
if (c_len == CBD_UNCOMPRESSED) {
is_compressed = false;
c_len = d_len;
}
p = lbd->lz4_cbuf;
for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) {
pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n);
if (pblk == PBLK_NONE) {
ret = -EIO;
goto out;
}
/* XXX: check pblk not in metadata? */
iopagev[0] = virt_to_page(p);
ret = pblk_read_wait(lbd->params, pblk, 1, iopagev);
if (ret) {
goto out;
}
}
if (is_compressed) {
if (lblk_decompress(lbd, c_len) != 0) {
printk(KERN_ERR " decompress failed\n");
ret = -EIO;
goto out;
}
}
else {
memcpy(lbd->buf, lbd->lz4_cbuf, d_len);
}
}
lbd->state = CACHE_STATE_CLEAN;
out:
mutex_unlock(&lbd->lock);
return ret;
}
bool
lbd_reset(struct lbd* lbd, u64 lblk)
{
if (lbd->lv) { printk(KERN_ERR "%s: lbatview leak\n", __func__); }
lbd->lv = lbatviewcache_get(lbd->lvc, lblk);
if (!lbd->lv) {
printk(KERN_ERR "%s: lbatviewcache_get failed\n", __func__);
return false;
}
lbd->lblk = lblk;
lbd->state = CACHE_STATE_UNCACHED;
return true;
}
void
lbd_data_read(struct lbd* lbd, u32 off, u32 len, u8* buf)
{
/* XXX: convert to BUG_ON */
if (off + len > PBLK_SIZE * lblk_per_pblk(lbd->params)) {
printk(KERN_ERR "%s: out of bounds\n", __func__);
return;
}
mutex_lock(&lbd->lock);
BUG_ON(lbd->state == CACHE_STATE_UNCACHED);
memcpy(buf, lbd->buf + off, len);
mutex_unlock(&lbd->lock);
}
void
lbd_data_write(struct lbd* lbd, u32 off, u32 len, const u8* buf)
{
/* XXX: convert to BUG_ON */
if (off + len > PBLK_SIZE * lblk_per_pblk(lbd->params)) {
printk(KERN_ERR "%s: out of bounds\n", __func__);
return;
}
mutex_lock(&lbd->lock);
BUG_ON(lbd->state == CACHE_STATE_UNCACHED);
memcpy(lbd->buf + off, buf, len);
lbd->state = CACHE_STATE_DIRTY;
mutex_unlock(&lbd->lock);
}
struct lbdcache
{
struct mutex lock;
struct cbd_params* params;
struct lbatviewcache* lvc;
unsigned int len;
struct lbd** cache;
};
size_t
lbdcache_size(void)
{
return sizeof(struct lbdcache);
}
static bool
lbdcache_realloc(struct lbdcache* lc, unsigned int len)
{
struct lbd** cache;
unsigned int n;
struct lbd* lbd;
cache = kzalloc(len * sizeof(struct lbd*), GFP_KERNEL);
if (!cache) {
return false;
}
n = 0;
if (lc->len) {
memcpy(cache, lc->cache, lc->len * sizeof(struct lbd*));
n = lc->len;
kfree(lc->cache);
}
lc->len = len;
lc->cache = cache;
while (n < len) {
lbd = kmalloc(sizeof(struct lbd), GFP_KERNEL);
if (!lbd) {
return false;
}
cache[n++] = lbd;
if (!lbd_ctr(lbd, lc->params, lc->lvc)) {
return false;
}
}
return true;
}
bool
lbdcache_ctr(struct lbdcache* lc,
struct cbd_params* params)
{
memset(lc, 0, sizeof(struct lbdcache));
mutex_init(&lc->lock);
lc->params = params;
lc->lvc = kzalloc(lbatviewcache_size(), GFP_KERNEL);
if (!lc->lvc) {
return false;
}
if (!lbatviewcache_ctr(lc->lvc, params)) {
return false;
}
return lbdcache_realloc(lc, 1024);
}
void
lbdcache_dtr(struct lbdcache* lc)
{
unsigned int n;
struct lbd* lbd;
for (n = 0; n < lc->len; ++n) {
lbd = lc->cache[n];
if (!lbd) {
continue;
}
lbd_dtr(lbd);
if (lbd->ref) {
printk(KERN_ERR "%s: lbd ref leak: n=%u ref=%u\n", __func__, n, lbd->ref);
}
kfree(lbd);
}
kfree(lc->cache);
lc->cache = NULL;
lc->len = 0;
lbatviewcache_dtr(lc->lvc);
kfree(lc->lvc);
lc->lvc = NULL;
lc->params = NULL;
}
struct lbd*
lbdcache_get(struct lbdcache* lc, u64 lblk)
{
unsigned int n;
struct lbd* lbd;
mutex_lock(&lc->lock);
for (n = 0; n < lc->len; ++n) {
lbd = lc->cache[n];
mutex_lock(&lbd->reflock);
if (lbd->lblk == lblk) {
if (lbd->ref == 0) {
goto found;
}
++lbd->ref;
mutex_unlock(&lbd->reflock);
goto out;
}
mutex_unlock(&lbd->reflock);
}
for (n = 0; n < lc->len; ++n) {
lbd = lc->cache[n];
mutex_lock(&lbd->reflock);
if (lbd->lblk == LBLK_NONE) {
goto found;
}
mutex_unlock(&lbd->reflock);
}
for (n = 0; n < lc->len; ++n) {
lbd = lc->cache[n];
mutex_lock(&lbd->reflock);
if (lbd->ref == 0 && lbd->state != CACHE_STATE_ERROR) {
goto found;
}
mutex_unlock(&lbd->reflock);
}
printk(KERN_INFO "%s: all objects in use, realloc...\n", __func__);
n = lc->len;
if (!lbdcache_realloc(lc, lc->len * 2)) {
printk(KERN_ERR "%s: realloc failed\n", __func__);
lbd = NULL;
goto out;
}
printk(KERN_INFO "%s: realloc done, using n=%u\n", __func__, n);
lbd = lc->cache[n];
mutex_lock(&lbd->reflock);
found:
if (!lbd_reset(lbd, lblk)) {
mutex_unlock(&lbd->reflock);
printk(KERN_ERR "%s: lbd_reset failed\n", __func__);
lbd = NULL;
goto out;
}
lbd->ref = 1;
mutex_unlock(&lbd->reflock);
out:
mutex_unlock(&lc->lock);
return lbd;
}
int
lbdcache_put(struct lbdcache* lc, struct lbd* lbd)
{
int ret = 0;
if (!lbd) {
return 0;
}
mutex_lock(&lc->lock);
mutex_lock(&lbd->reflock);
if (--lbd->ref == 0) {
ret = lbd_flush(lbd);
if (ret) {
printk(KERN_ERR "%s: lbd_flush failed\n", __func__);
}
}
mutex_unlock(&lbd->reflock);
mutex_unlock(&lc->lock);
return ret;
}