WIP: Implement zone caches

This commit is contained in:
Tom Marshall 2019-10-10 15:07:39 -07:00
parent 07f4221a5f
commit 24401f9a68
1 changed files with 312 additions and 179 deletions

View File

@ -50,13 +50,11 @@ struct dm_compress_io {
struct work_struct work; struct work_struct work;
}; };
struct dm_compress struct zone_cache {
{ u32 zone;
struct dm_dev* dev; struct mutex lock;
bool io_failed;
struct cbd_params params;
/* Compression working memory */
u8* lz4_wrkmem; u8* lz4_wrkmem;
u8* lz4_cbuf; u8* lz4_cbuf;
@ -78,11 +76,26 @@ struct dm_compress
void* lblk; void* lblk;
};
struct dm_compress
{
struct dm_dev* dev;
bool io_failed;
struct cbd_params params;
struct mutex zc_lock;
unsigned int nr_zc;
struct zone_cache* zcache;
/* Queueing stuff */ /* Queueing stuff */
struct workqueue_struct* io_queue; struct workqueue_struct* io_queue;
struct mutex io_lock;
}; };
/* Forward decls */
static struct zone_cache* zone_cache_get(struct dm_compress*, u32);
static int zone_cache_put(struct dm_compress*, struct zone_cache*);
static inline int static inline int
memcmpz(const void* buf, size_t len) memcmpz(const void* buf, size_t len)
{ {
@ -252,68 +265,65 @@ blkdev_pblk_write(struct dm_compress* dc, u64 pblk, u32 count, void *data)
**************************************/ **************************************/
static int static int
pblk_alloc_write(struct dm_compress* dc) pblk_alloc_write(struct dm_compress* dc, struct zone_cache* zc)
{ {
u64 pblk; u64 pblk;
u32 count; u32 count;
void* pg; void* pg;
BUG_ON(dc->pblk_alloc_idx == ZONE_NONE); BUG_ON(zc->pblk_alloc_idx == ZONE_NONE);
pblk = pblk_alloc_off(&dc->params, dc->pblk_alloc_idx); pblk = pblk_alloc_off(&dc->params, zc->pblk_alloc_idx);
count = pblk_alloc_len(&dc->params); count = pblk_alloc_len(&dc->params);
pg = compress_alloc_pages(PBLK_SIZE); pg = compress_alloc_pages(PBLK_SIZE);
if (!pg) { if (!pg) {
return -ENOMEM; return -ENOMEM;
} }
memcpy(pg, dc->pblk_alloc, count * PBLK_SIZE); memcpy(pg, zc->pblk_alloc, count * PBLK_SIZE);
blkdev_pblk_write(dc, pblk, count, pg); blkdev_pblk_write(dc, pblk, count, pg);
dc->pblk_alloc_dirty = false; zc->pblk_alloc_dirty = false;
return 0; return 0;
} }
static int static int
pblk_alloc_flush(struct dm_compress* dc) pblk_alloc_flush(struct dm_compress* dc, struct zone_cache* zc)
{ {
int ret; int ret = 0;
if (zc->pblk_alloc_dirty) {
ret = pblk_alloc_write(dc, zc);
}
if (dc->pblk_alloc_dirty) {
ret = pblk_alloc_write(dc);
if (ret) {
return ret; return ret;
}
}
return 0;
} }
/* Read zone physical block alloc bitmap */ /* Read zone physical block alloc bitmap */
static int static int
pblk_alloc_read(struct dm_compress* dc, u32 idx) pblk_alloc_read(struct dm_compress* dc, struct zone_cache* zc)
{ {
int ret; int ret;
u64 pblk; u64 pblk;
u32 count; u32 count;
if (dc->pblk_alloc_idx == idx) { if (zc->pblk_alloc_idx == zc->zone) {
return 0; return 0;
} }
ret = pblk_alloc_flush(dc); ret = pblk_alloc_flush(dc, zc);
if (ret != 0) {
return ret;
}
pblk = pblk_alloc_off(&dc->params, idx);
count = pblk_alloc_len(&dc->params);
ret = blkdev_pblk_read(dc, pblk, count, dc->pblk_alloc);
if (ret) { if (ret) {
return ret; return ret;
} }
dc->pblk_alloc_idx = idx; pblk = pblk_alloc_off(&dc->params, zc->zone);
count = pblk_alloc_len(&dc->params);
ret = blkdev_pblk_read(dc, pblk, count, zc->pblk_alloc);
if (ret) {
return ret;
}
zc->pblk_alloc_idx = zc->zone;
return 0; return 0;
} }
@ -323,30 +333,36 @@ pblk_alloc_read(struct dm_compress* dc, u32 idx)
* XXX: get rid of this function and use pblk_alloc directly in lblk_write(). * XXX: get rid of this function and use pblk_alloc directly in lblk_write().
*/ */
static u64 static u64
pblk_alloc_get(struct dm_compress* dc, u32 zone_hint) pblk_alloc_get(struct dm_compress* dc, struct zone_cache* zc_hint)
{ {
u32 zone_pblk_count = pblk_alloc_len(&dc->params) * PBLK_SIZE_BITS; u32 zone_pblk_count = pblk_alloc_len(&dc->params) * PBLK_SIZE_BITS;
struct zone_cache* zc;
u32 zone; u32 zone;
u32 idx; u32 idx;
zone = zone_hint; zc = zc_hint;
zone = zc->zone;
/* XXX: check both forward and backward */ /* XXX: check both forward and backward */
do { do {
if (pblk_alloc_read(dc, zone) != 0) { if (pblk_alloc_read(dc, zc) != 0) {
printk(KERN_ERR " pblk_alloc_read failed\n"); printk(KERN_ERR " pblk_alloc_read failed\n");
return 0; return 0;
} }
idx = cbd_bitmap_alloc(dc->pblk_alloc, zone_pblk_count); idx = cbd_bitmap_alloc(zc->pblk_alloc, zone_pblk_count);
if (idx != zone_pblk_count) { if (idx != zone_pblk_count) {
dc->pblk_alloc_dirty = true; zc->pblk_alloc_dirty = true;
if (zc != zc_hint) {
zone_cache_put(dc, zc);
}
return zone_data_off(&dc->params, zone) + idx; return zone_data_off(&dc->params, zone) + idx;
} }
++zone; ++zone;
if (zone == dc->params.nr_zones) { if (zone == dc->params.nr_zones) {
zone = 0; zone = 0;
} }
zc = zone_cache_get(dc, zone);
} }
while (zone != zone_hint); while (zc != zc_hint);
printk(KERN_ERR "%s: fail, all zones full\n", __func__); printk(KERN_ERR "%s: fail, all zones full\n", __func__);
return 0; return 0;
@ -357,9 +373,10 @@ pblk_alloc_get(struct dm_compress* dc, u32 zone_hint)
* XXX: get rid of this function and use pblk_free directly in lblk_write(). * XXX: get rid of this function and use pblk_free directly in lblk_write().
*/ */
static int static int
pblk_alloc_put(struct dm_compress* dc, u64 pblk) pblk_alloc_put(struct dm_compress* dc, struct zone_cache* zc, u64 pblk)
{ {
u32 zone_pblk_count = pblk_alloc_len(&dc->params) * PBLK_SIZE_BITS; u32 zone_pblk_count = pblk_alloc_len(&dc->params) * PBLK_SIZE_BITS;
bool put_zone = false;
u32 zone; u32 zone;
u32 idx; u32 idx;
int ret; int ret;
@ -382,14 +399,25 @@ pblk_alloc_put(struct dm_compress* dc, u64 pblk)
printk(KERN_ERR "%s: pblk index out of bounds\n", __func__); printk(KERN_ERR "%s: pblk index out of bounds\n", __func__);
return -EINVAL; return -EINVAL;
} }
ret = pblk_alloc_read(dc, zone);
if (ret != 0) {
return ret;
}
cbd_bitmap_free(dc->pblk_alloc, idx);
dc->pblk_alloc_dirty = true;
return 0; if (zone != zc->zone) {
zc = zone_cache_get(dc, zone);
put_zone = true;
}
ret = pblk_alloc_read(dc, zc);
if (ret) {
goto out_put;
}
cbd_bitmap_free(zc->pblk_alloc, idx);
zc->pblk_alloc_dirty = true;
out_put:
if (put_zone) {
zone_cache_put(dc, zc);
}
return ret;
} }
/************************************** /**************************************
@ -397,7 +425,7 @@ pblk_alloc_put(struct dm_compress* dc, u64 pblk)
**************************************/ **************************************/
static int static int
lblk_alloc_elem_write(struct dm_compress* dc) lblk_alloc_elem_write(struct dm_compress* dc, struct zone_cache* zc)
{ {
u32 zone; u32 zone;
u32 zone_lblk; u32 zone_lblk;
@ -409,32 +437,32 @@ lblk_alloc_elem_write(struct dm_compress* dc)
u8* buf; u8* buf;
void* pg; void* pg;
BUG_ON(dc->lblk_alloc_elem_lblk == LBLK_NONE); BUG_ON(zc->lblk_alloc_elem_lblk == LBLK_NONE);
BUG_ON(dc->lblk_alloc_pblk == PBLK_NONE); BUG_ON(zc->lblk_alloc_pblk == PBLK_NONE);
BUG_ON(dc->lblk_alloc_len == 0); BUG_ON(zc->lblk_alloc_len == 0);
zone = dc->lblk_alloc_elem_lblk / dc->params.lblk_per_zone; zone = zc->lblk_alloc_elem_lblk / dc->params.lblk_per_zone;
zone_lblk = dc->lblk_alloc_elem_lblk - (zone * dc->params.lblk_per_zone); zone_lblk = zc->lblk_alloc_elem_lblk - (zc->zone * dc->params.lblk_per_zone);
elem_off = lblk_alloc_elem_len(&dc->params) * zone_lblk; elem_off = lblk_alloc_elem_len(&dc->params) * zone_lblk;
elem_end = elem_off + lblk_alloc_elem_len(&dc->params); elem_end = elem_off + lblk_alloc_elem_len(&dc->params);
rel_pblk = elem_off / PBLK_SIZE; rel_pblk = elem_off / PBLK_SIZE;
count = dc->lblk_alloc_len; count = zc->lblk_alloc_len;
pblk = dc->lblk_alloc_pblk; pblk = zc->lblk_alloc_pblk;
buf = dc->lblk_alloc + (elem_off - rel_pblk * PBLK_SIZE); buf = zc->lblk_alloc + (elem_off - rel_pblk * PBLK_SIZE);
lblk_alloc_elem_put(&dc->params, buf, dc->lblk_alloc_elem); lblk_alloc_elem_put(&dc->params, buf, zc->lblk_alloc_elem);
pg = compress_alloc_pages(count * PBLK_SIZE); pg = compress_alloc_pages(count * PBLK_SIZE);
if (!pg) { if (!pg) {
return -ENOMEM; return -ENOMEM;
} }
memcpy(pg, dc->lblk_alloc, count * PBLK_SIZE); memcpy(pg, zc->lblk_alloc, count * PBLK_SIZE);
blkdev_pblk_write(dc, pblk, count, pg); blkdev_pblk_write(dc, pblk, count, pg);
return 0; return 0;
} }
static int static int
lblk_alloc_elem_read(struct dm_compress* dc, u64 lblk) lblk_alloc_elem_read(struct dm_compress* dc, struct zone_cache* zc, u64 lblk)
{ {
int ret; int ret;
u32 zone; u32 zone;
@ -446,7 +474,7 @@ lblk_alloc_elem_read(struct dm_compress* dc, u64 lblk)
u64 pblk; u64 pblk;
u8* buf; u8* buf;
if (dc->lblk_alloc_elem_lblk == lblk) { if (zc->lblk_alloc_elem_lblk == lblk) {
return 0; return 0;
} }
@ -457,17 +485,17 @@ lblk_alloc_elem_read(struct dm_compress* dc, u64 lblk)
rel_pblk = elem_off / PBLK_SIZE; rel_pblk = elem_off / PBLK_SIZE;
count = 1 + (elem_end - 1) / PBLK_SIZE - (elem_off / PBLK_SIZE); count = 1 + (elem_end - 1) / PBLK_SIZE - (elem_off / PBLK_SIZE);
pblk = lblk_alloc_off(&dc->params, zone) + rel_pblk; pblk = lblk_alloc_off(&dc->params, zone) + rel_pblk;
if (dc->lblk_alloc_pblk != pblk || dc->lblk_alloc_len < count) { if (zc->lblk_alloc_pblk != pblk || zc->lblk_alloc_len < count) {
ret = blkdev_pblk_read(dc, pblk, count, dc->lblk_alloc); ret = blkdev_pblk_read(dc, pblk, count, zc->lblk_alloc);
if (ret != 0) { if (ret != 0) {
return ret; return ret;
} }
dc->lblk_alloc_pblk = pblk; zc->lblk_alloc_pblk = pblk;
dc->lblk_alloc_len = count; zc->lblk_alloc_len = count;
} }
buf = dc->lblk_alloc + (elem_off - rel_pblk * PBLK_SIZE); buf = zc->lblk_alloc + (elem_off - rel_pblk * PBLK_SIZE);
lblk_alloc_elem_get(&dc->params, buf, dc->lblk_alloc_elem); lblk_alloc_elem_get(&dc->params, buf, zc->lblk_alloc_elem);
dc->lblk_alloc_elem_lblk = lblk; zc->lblk_alloc_elem_lblk = lblk;
return 0; return 0;
} }
@ -482,15 +510,15 @@ lblk_alloc_elem_read(struct dm_compress* dc, u64 lblk)
* Returns number of bytes in cbuf or 0 for failure. * Returns number of bytes in cbuf or 0 for failure.
*/ */
static size_t static size_t
lblk_compress(struct dm_compress* dc) lblk_compress(struct cbd_params* params, struct zone_cache* zc)
{ {
int ret; int ret;
void *dbuf = dc->lblk; void *dbuf = zc->lblk;
u32 dlen = PBLK_SIZE * lblk_per_pblk(&dc->params); u32 dlen = PBLK_SIZE * lblk_per_pblk(params);
void *cbuf = dc->lz4_cbuf; void *cbuf = zc->lz4_cbuf;
u32 clen = PBLK_SIZE * lblk_per_pblk(&dc->params); u32 clen = PBLK_SIZE * lblk_per_pblk(params);
ret = LZ4_compress_default(dbuf, cbuf, dlen, clen, dc->lz4_wrkmem); ret = LZ4_compress_default(dbuf, cbuf, dlen, clen, zc->lz4_wrkmem);
if (ret <= 0) { if (ret <= 0) {
return 0; return 0;
} }
@ -504,12 +532,12 @@ lblk_compress(struct dm_compress* dc)
* Returns 0 for success, <0 for failure. * Returns 0 for success, <0 for failure.
*/ */
static int static int
lblk_decompress(struct dm_compress* dc, u32 clen) lblk_decompress(struct cbd_params* params, struct zone_cache* zc, u32 clen)
{ {
int ret; int ret;
void *cbuf = dc->lz4_cbuf; void *cbuf = zc->lz4_cbuf;
void *dbuf = dc->lblk; void *dbuf = zc->lblk;
u32 dlen = PBLK_SIZE * lblk_per_pblk(&dc->params); u32 dlen = PBLK_SIZE * lblk_per_pblk(params);
ret = LZ4_decompress_safe(cbuf, dbuf, clen, dlen); ret = LZ4_decompress_safe(cbuf, dbuf, clen, dlen);
if (ret != dlen) { if (ret != dlen) {
@ -521,7 +549,7 @@ lblk_decompress(struct dm_compress* dc, u32 clen)
} }
static int static int
lblk_write(struct dm_compress* dc) lblk_write(struct dm_compress* dc, struct zone_cache* zc)
{ {
int ret; int ret;
u32 zone; u32 zone;
@ -533,51 +561,51 @@ lblk_write(struct dm_compress* dc)
u32 n; u32 n;
u64 pblk; u64 pblk;
zone = dc->lblk_num / dc->params.lblk_per_zone; zone = zc->zone;
zone_lblk = dc->lblk_num - (zone * dc->params.lblk_per_zone); zone_lblk = zc->lblk_num - (zone * dc->params.lblk_per_zone);
elem_buf = dc->lblk_alloc + zone_lblk * lblk_alloc_elem_len(&dc->params); elem_buf = zc->lblk_alloc + zone_lblk * lblk_alloc_elem_len(&dc->params);
/* We must have a cached lblk elem */ /* We must have a cached lblk elem */
BUG_ON(dc->lblk_alloc_elem_lblk == LBLK_NONE); BUG_ON(zc->lblk_alloc_elem_lblk == LBLK_NONE);
d_len = PBLK_SIZE * lblk_per_pblk(&dc->params); d_len = PBLK_SIZE * lblk_per_pblk(&dc->params);
#ifdef CBD_DETECT_ZERO_BLOCKS #ifdef CBD_DETECT_ZERO_BLOCKS
if (memcmpz(dc->lblk, d_len) == 0) { if (memcmpz(zc->lblk, d_len) == 0) {
#else #else
if (0) { if (0) {
#endif #endif
c_len = 0; c_len = 0;
c_buf = NULL; c_buf = NULL;
dc->lblk_alloc_elem->len = 0; zc->lblk_alloc_elem->len = 0;
} }
else { else {
c_len = lblk_compress(dc); c_len = lblk_compress(&dc->params, zc);
if (c_len > 0) { if (c_len > 0) {
size_t c_blkrem = c_len % PBLK_SIZE; size_t c_blkrem = c_len % PBLK_SIZE;
if (c_blkrem) { if (c_blkrem) {
memset(dc->lz4_cbuf + c_len, 0, c_blkrem); memset(zc->lz4_cbuf + c_len, 0, c_blkrem);
} }
c_buf = dc->lz4_cbuf; c_buf = zc->lz4_cbuf;
dc->lblk_alloc_elem->len = c_len; zc->lblk_alloc_elem->len = c_len;
} }
else { else {
c_len = d_len; c_len = d_len;
c_buf = dc->lblk; c_buf = zc->lblk;
dc->lblk_alloc_elem->len = CBD_UNCOMPRESSED; zc->lblk_alloc_elem->len = CBD_UNCOMPRESSED;
} }
} }
for (n = 0; n < lblk_per_pblk(&dc->params); ++n) { for (n = 0; n < lblk_per_pblk(&dc->params); ++n) {
if (c_len > PBLK_SIZE * n) { if (c_len > PBLK_SIZE * n) {
void* pg; void* pg;
pblk = dc->lblk_alloc_elem->pblk[n]; pblk = zc->lblk_alloc_elem->pblk[n];
if (!pblk) { if (!pblk) {
pblk = pblk_alloc_get(dc, zone); pblk = pblk_alloc_get(dc, zc);
if (pblk == 0) { if (pblk == 0) {
printk(KERN_ERR " pblk_alloc_get failed\n"); printk(KERN_ERR " pblk_alloc_get failed\n");
return -ENOSPC; return -ENOSPC;
} }
dc->lblk_alloc_elem->pblk[n] = pblk; zc->lblk_alloc_elem->pblk[n] = pblk;
} }
pg = compress_alloc_pages(PBLK_SIZE); pg = compress_alloc_pages(PBLK_SIZE);
if (!pg) { if (!pg) {
@ -588,10 +616,10 @@ lblk_write(struct dm_compress* dc)
c_buf += PBLK_SIZE; c_buf += PBLK_SIZE;
} }
else { else {
pblk = dc->lblk_alloc_elem->pblk[n]; pblk = zc->lblk_alloc_elem->pblk[n];
if (pblk) { if (pblk) {
dc->lblk_alloc_elem->pblk[n] = 0; zc->lblk_alloc_elem->pblk[n] = 0;
ret = pblk_alloc_put(dc, pblk); ret = pblk_alloc_put(dc, zc, pblk);
if (ret != 0) { if (ret != 0) {
printk(KERN_ERR " pblk_alloc_put failed\n"); printk(KERN_ERR " pblk_alloc_put failed\n");
return ret; return ret;
@ -600,29 +628,29 @@ lblk_write(struct dm_compress* dc)
} }
} }
ret = lblk_alloc_elem_write(dc); ret = lblk_alloc_elem_write(dc, zc);
if (ret != 0) { if (ret != 0) {
printk(KERN_ERR " lblk_alloc_elem_write failed\n"); printk(KERN_ERR " lblk_alloc_elem_write failed\n");
return ret; return ret;
} }
ret = pblk_alloc_flush(dc); ret = pblk_alloc_flush(dc, zc);
if (ret != 0) { if (ret != 0) {
printk(KERN_ERR " pblk_alloc_flush failed\n"); printk(KERN_ERR " pblk_alloc_flush failed\n");
return ret; return ret;
} }
dc->lblk_dirty = false; zc->lblk_dirty = false;
return 0; return 0;
} }
static int static int
lblk_flush(struct dm_compress* dc) lblk_flush(struct dm_compress* dc, struct zone_cache* zc)
{ {
int ret; int ret;
if (dc->lblk_dirty) { if (zc->lblk_dirty) {
ret = lblk_write(dc); ret = lblk_write(dc, zc);
if (ret) { if (ret) {
return ret; return ret;
} }
@ -632,7 +660,7 @@ lblk_flush(struct dm_compress* dc)
} }
static int static int
lblk_read(struct dm_compress* dc, u64 idx) lblk_read(struct dm_compress* dc, struct zone_cache* zc, u64 idx)
{ {
int ret; int ret;
u32 zone; u32 zone;
@ -641,27 +669,27 @@ lblk_read(struct dm_compress* dc, u64 idx)
u32 c_len; u32 c_len;
u64 pblk; u64 pblk;
if (dc->lblk_num == idx) { if (zc->lblk_num == idx) {
return 0; return 0;
} }
ret = lblk_flush(dc); ret = lblk_flush(dc, zc);
if (ret) { if (ret) {
return ret; return ret;
} }
zone = idx / dc->params.lblk_per_zone; zone = idx / dc->params.lblk_per_zone;
zone_lblk = idx - (zone * dc->params.lblk_per_zone); zone_lblk = idx - (zone * dc->params.lblk_per_zone);
elem_buf = dc->lblk_alloc + zone_lblk * lblk_alloc_elem_len(&dc->params); elem_buf = zc->lblk_alloc + zone_lblk * lblk_alloc_elem_len(&dc->params);
ret = lblk_alloc_elem_read(dc, idx); ret = lblk_alloc_elem_read(dc, zc, idx);
if (ret != 0) { if (ret != 0) {
printk(KERN_ERR " lblk_alloc_elem_read failed\n"); printk(KERN_ERR " lblk_alloc_elem_read failed\n");
return ret; return ret;
} }
c_len = dc->lblk_alloc_elem->len; c_len = zc->lblk_alloc_elem->len;
if (c_len == 0) { if (c_len == 0) {
memset(dc->lblk, 0, PBLK_SIZE * lblk_per_pblk(&dc->params)); memset(zc->lblk, 0, PBLK_SIZE * lblk_per_pblk(&dc->params));
} }
else { else {
bool is_compressed = true; bool is_compressed = true;
@ -673,9 +701,9 @@ lblk_read(struct dm_compress* dc, u64 idx)
is_compressed = false; is_compressed = false;
c_len = d_len; c_len = d_len;
} }
p = dc->lz4_cbuf; p = zc->lz4_cbuf;
for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) { for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) {
pblk = dc->lblk_alloc_elem->pblk[n]; pblk = zc->lblk_alloc_elem->pblk[n];
BUG_ON(pblk == 0); BUG_ON(pblk == 0);
ret = blkdev_pblk_read(dc, pblk, 1, p); ret = blkdev_pblk_read(dc, pblk, 1, p);
if (ret != 0) { if (ret != 0) {
@ -683,89 +711,169 @@ lblk_read(struct dm_compress* dc, u64 idx)
} }
} }
if (is_compressed) { if (is_compressed) {
if (lblk_decompress(dc, c_len) != 0) { if (lblk_decompress(&dc->params, zc, c_len) != 0) {
printk(KERN_ERR " decompress failed\n"); printk(KERN_ERR " decompress failed\n");
return -1; return -1;
} }
} }
else { else {
memcpy(dc->lblk, dc->lz4_cbuf, d_len); memcpy(zc->lblk, zc->lz4_cbuf, d_len);
} }
} }
dc->lblk_num = idx; zc->lblk_num = idx;
return 0; return 0;
} }
/************************************** /**************************************
* Main functions * Zone cache functions
**************************************/ **************************************/
static void static void
compress_free_buffers(struct dm_compress* dc) zone_cache_reset(struct zone_cache* zc, u32 zone)
{ {
compress_free_pages(dc->lblk, PBLK_SIZE * lblk_per_pblk(&dc->params)); zc->zone = zone;
dc->lblk = NULL; zc->pblk_alloc_idx = ZONE_NONE;
zc->pblk_alloc_dirty = false;
kfree(dc->lblk_alloc_elem); zc->lblk_alloc_pblk = PBLK_NONE;
dc->lblk_alloc_elem = NULL; zc->lblk_alloc_len = 0;
zc->lblk_alloc_elem_lblk = LBLK_NONE;
compress_free_pages(dc->lblk_alloc, PBLK_SIZE * 2); zc->lblk_num = LBLK_NONE;
dc->lblk_alloc = NULL; zc->lblk_dirty = false;
compress_free_pages(dc->pblk_alloc, PBLK_SIZE * pblk_alloc_len(&dc->params));
dc->pblk_alloc = NULL;
compress_free_pages(dc->lz4_cbuf, PBLK_SIZE * lblk_per_pblk(&dc->params));
dc->lz4_cbuf = NULL;
kfree(dc->lz4_wrkmem);
dc->lz4_wrkmem = NULL;
} }
/*
* XXX: Many of the below (all except lz4 buffers) are used in bio operations
* and should be page aligned. We always get page aligned buffers because of
* the way kmalloc() works, but that is technically not guaranteed.
*/
static int static int
compress_alloc_buffers(struct dm_compress* dc) zone_cache_flush(struct dm_compress* dc, struct zone_cache* zc)
{ {
dc->lz4_wrkmem = kmalloc(LZ4_compressBound(PBLK_SIZE * lblk_per_pblk(&dc->params)), GFP_KERNEL); int ret;
if (!dc->lz4_wrkmem) {
ret = lblk_flush(dc, zc);
if (ret) {
return ret;
}
ret = pblk_alloc_flush(dc, zc);
if (ret) {
return ret;
}
return 0;
}
static struct zone_cache*
zone_cache_get(struct dm_compress* dc, u32 zone)
{
struct zone_cache* zc;
u32 idx;
//printk(KERN_INFO "%s: zone=%u\n", __func__, (unsigned int)zone);
mutex_lock(&dc->zc_lock);
for (idx = 0; idx < dc->nr_zc; ++idx) {
zc = &dc->zcache[idx];
if (zc->zone == zone) {
mutex_lock(&zc->lock);
goto out;
}
}
for (idx = 0; idx < dc->nr_zc; ++idx) {
zc = &dc->zcache[idx];
if (zc->zone == ZONE_NONE) {
zone_cache_reset(zc, zone);
mutex_lock(&zc->lock);
goto out;
}
}
for (idx = 0; idx < dc->nr_zc; ++idx) {
zc = &dc->zcache[idx];
if (mutex_trylock(&zc->lock) == 1) {
zone_cache_reset(zc, zone);
goto out;
}
}
printk(KERN_ERR "%s: Cannot get zone %u\n", __func__, (unsigned int)zone);
zc = NULL;
out:
mutex_unlock(&dc->zc_lock);
return zc;
}
static int
zone_cache_put(struct dm_compress* dc, struct zone_cache* zc)
{
int ret;
//printk(KERN_INFO "%s: zone=%u\n", __func__, (unsigned int)zc->zone);
ret = zone_cache_flush(dc, zc);
mutex_unlock(&zc->lock);
return ret;
}
static void
zone_cache_dtr(struct dm_compress* dc, struct zone_cache* zc)
{
compress_free_pages(zc->lblk, PBLK_SIZE * lblk_per_pblk(&dc->params));
zc->lblk = NULL;
kfree(zc->lblk_alloc_elem);
zc->lblk_alloc_elem = NULL;
compress_free_pages(zc->lblk_alloc, PBLK_SIZE * 2);
zc->lblk_alloc = NULL;
compress_free_pages(zc->pblk_alloc, PBLK_SIZE * pblk_alloc_len(&dc->params));
zc->pblk_alloc = NULL;
compress_free_pages(zc->lz4_cbuf, PBLK_SIZE * lblk_per_pblk(&dc->params));
zc->lz4_cbuf = NULL;
kfree(zc->lz4_wrkmem);
zc->lz4_wrkmem = NULL;
}
static int
zone_cache_ctr(struct dm_compress* dc, struct zone_cache* zc)
{
zc->zone = ZONE_NONE;
mutex_init(&zc->lock);
zc->lz4_wrkmem = kmalloc(LZ4_compressBound(PBLK_SIZE * lblk_per_pblk(&dc->params)), GFP_KERNEL);
if (!zc->lz4_wrkmem) {
printk(KERN_ERR "%s: Failed to alloc lz4_wrkmem\n", __func__); printk(KERN_ERR "%s: Failed to alloc lz4_wrkmem\n", __func__);
goto out_nomem; goto out_nomem;
} }
dc->lz4_cbuf = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params)); zc->lz4_cbuf = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params));
if (!dc->lz4_cbuf) { if (!zc->lz4_cbuf) {
printk(KERN_ERR "%s: Failed to alloc lz4_cmem\n", __func__); printk(KERN_ERR "%s: Failed to alloc lz4_cmem\n", __func__);
goto out_nomem; goto out_nomem;
} }
dc->pblk_alloc_idx = ZONE_NONE; zc->pblk_alloc_idx = ZONE_NONE;
dc->pblk_alloc_dirty = false; zc->pblk_alloc_dirty = false;
dc->pblk_alloc = compress_alloc_pages(PBLK_SIZE * pblk_alloc_len(&dc->params)); zc->pblk_alloc = compress_alloc_pages(PBLK_SIZE * pblk_alloc_len(&dc->params));
if (!dc->pblk_alloc) { if (!zc->pblk_alloc) {
printk(KERN_ERR "%s: Failed to alloc pblk_alloc\n", __func__); printk(KERN_ERR "%s: Failed to alloc pblk_alloc\n", __func__);
goto out_nomem; goto out_nomem;
} }
dc->lblk_alloc_pblk = PBLK_NONE; zc->lblk_alloc_pblk = PBLK_NONE;
dc->lblk_alloc_len = 0; zc->lblk_alloc_len = 0;
dc->lblk_alloc = compress_alloc_pages(PBLK_SIZE * 2); zc->lblk_alloc = compress_alloc_pages(PBLK_SIZE * 2);
if (!dc->lblk_alloc) { if (!zc->lblk_alloc) {
printk(KERN_ERR "%s: Failed to alloc lblk_alloc\n", __func__); printk(KERN_ERR "%s: Failed to alloc lblk_alloc\n", __func__);
goto out_nomem; goto out_nomem;
} }
dc->lblk_alloc_elem_lblk = LBLK_NONE; zc->lblk_alloc_elem_lblk = LBLK_NONE;
dc->lblk_alloc_elem = kmalloc(offsetof(struct lblk_alloc_elem, pblk[lblk_per_pblk(&dc->params)]), GFP_KERNEL); zc->lblk_alloc_elem = kmalloc(offsetof(struct lblk_alloc_elem, pblk[lblk_per_pblk(&dc->params)]), GFP_KERNEL);
if (!dc->lblk_alloc_elem) { if (!zc->lblk_alloc_elem) {
printk(KERN_ERR "%s: Failed to alloc lblk_alloc_elem\n", __func__); printk(KERN_ERR "%s: Failed to alloc lblk_alloc_elem\n", __func__);
goto out_nomem; goto out_nomem;
} }
dc->lblk_num = LBLK_NONE; zc->lblk_num = LBLK_NONE;
dc->lblk_dirty = false; zc->lblk_dirty = false;
dc->lblk = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params)); zc->lblk = compress_alloc_pages(PBLK_SIZE * lblk_per_pblk(&dc->params));
if (!dc->lblk) { if (!zc->lblk) {
printk(KERN_ERR "%s: Failed to alloc lblk\n", __func__); printk(KERN_ERR "%s: Failed to alloc lblk\n", __func__);
goto out_nomem; goto out_nomem;
} }
@ -773,10 +881,14 @@ compress_alloc_buffers(struct dm_compress* dc)
return 0; return 0;
out_nomem: out_nomem:
compress_free_buffers(dc); zone_cache_dtr(dc, zc);
return -ENOMEM; return -ENOMEM;
} }
/**************************************
* Main functions
**************************************/
static int static int
compress_open(struct dm_compress* dc, u64 dev_nr_pblks) compress_open(struct dm_compress* dc, u64 dev_nr_pblks)
{ {
@ -784,6 +896,7 @@ compress_open(struct dm_compress* dc, u64 dev_nr_pblks)
u8 *pblkbuf; u8 *pblkbuf;
struct cbd_header header; struct cbd_header header;
u64 max_nr_zones; u64 max_nr_zones;
unsigned int n;
pblkbuf = kmalloc(PBLK_SIZE, GFP_KERNEL); pblkbuf = kmalloc(PBLK_SIZE, GFP_KERNEL);
if (!pblkbuf) { if (!pblkbuf) {
@ -846,22 +959,30 @@ compress_open(struct dm_compress* dc, u64 dev_nr_pblks)
memcpy(&dc->params, &header.params, sizeof(header.params)); memcpy(&dc->params, &header.params, sizeof(header.params));
err = compress_alloc_buffers(dc); mutex_init(&dc->zc_lock);
if (err) { dc->nr_zc = min(2 * num_online_cpus(), dc->params.nr_zones);
printk(KERN_ERR "%s: failed to alloc buffers\n", __func__); dc->zcache = kmalloc(dc->nr_zc * sizeof(struct zone_cache), GFP_KERNEL);
if (!dc->zcache) {
printk(KERN_ERR "%s: out of memory\n", __func__);
goto out; goto out;
} }
for (n = 0; n < dc->nr_zc; ++n) {
err = zone_cache_ctr(dc, &dc->zcache[n]);
if (err) {
printk(KERN_ERR "%s: failed to init zone cache\n", __func__);
goto out;
}
}
dc->io_queue = alloc_workqueue("kcompress_io", WQ_HIGHPRI | WQ_MEM_RECLAIM, 1); dc->io_queue = alloc_workqueue("kcompress_io", WQ_HIGHPRI | WQ_MEM_RECLAIM, 1);
if (!dc->io_queue) { if (!dc->io_queue) {
printk(KERN_ERR "%s: failed to alloc io_queue\n", __func__); printk(KERN_ERR "%s: failed to alloc io_queue\n", __func__);
compress_free_buffers(dc); err = -ENOMEM;
return -ENOMEM; goto out;
} }
mutex_init(&dc->io_lock);
out: out:
/* XXX: cleanup on error */
kfree(pblkbuf); kfree(pblkbuf);
return err; return err;
@ -879,22 +1000,29 @@ compress_read(struct dm_compress *dc, struct bio *bio)
bio_for_each_segment(bv, bio, iter) { bio_for_each_segment(bv, bio, iter) {
sector_t lblk = iter.bi_sector / lblk_per_sector; sector_t lblk = iter.bi_sector / lblk_per_sector;
u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE; u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
u32 zone = lblk / dc->params.lblk_per_zone;
struct zone_cache* zc = NULL;
unsigned long flags; unsigned long flags;
char* data; char* data;
zc = zone_cache_get(dc, zone);
/* Ensure the data is within the logical block */ /* Ensure the data is within the logical block */
if (lblk_off + bv.bv_len > lblk_len) { if (lblk_off + bv.bv_len > lblk_len) {
printk(KERN_ERR "%s: logical block bounds exceeded\n", __func__); printk(KERN_ERR "%s: logical block bounds exceeded\n", __func__);
return -EIO; return -EIO;
} }
/* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */ /* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */
ret = lblk_read(dc, lblk); ret = lblk_read(dc, zc, lblk);
if (ret) { if (ret) {
zone_cache_put(dc, zc);
return ret; return ret;
} }
data = bvec_kmap_irq(&bv, &flags); data = bvec_kmap_irq(&bv, &flags);
memcpy(data, dc->lblk + lblk_off, bv.bv_len); memcpy(data, zc->lblk + lblk_off, bv.bv_len);
bvec_kunmap_irq(data, &flags); bvec_kunmap_irq(data, &flags);
zone_cache_put(dc, zc);
} }
return 0; return 0;
@ -912,9 +1040,13 @@ compress_write(struct dm_compress *dc, struct bio *bio)
bio_for_each_segment(bv, bio, iter) { bio_for_each_segment(bv, bio, iter) {
sector_t lblk = iter.bi_sector / lblk_per_sector; sector_t lblk = iter.bi_sector / lblk_per_sector;
u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE; u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
u32 zone = lblk / dc->params.lblk_per_zone;
struct zone_cache* zc = NULL;
unsigned long flags; unsigned long flags;
char* data; char* data;
zc = zone_cache_get(dc, zone);
/* Ensure the data is within the logical block */ /* Ensure the data is within the logical block */
if (lblk_off + bv.bv_len > lblk_len) { if (lblk_off + bv.bv_len > lblk_len) {
printk(KERN_ERR "%s logical block bounds exceeded\n", __func__); printk(KERN_ERR "%s logical block bounds exceeded\n", __func__);
@ -924,18 +1056,17 @@ compress_write(struct dm_compress *dc, struct bio *bio)
return -EIO; return -EIO;
} }
/* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */ /* BUG_ON(lblk_off + bv.bv_offset + bv.bv_len > PBLK_SIZE + lblk_per_pblk(dc)); */
ret = lblk_read(dc, lblk); ret = lblk_read(dc, zc, lblk);
if (ret) { if (ret) {
zone_cache_put(dc, zc);
return ret; return ret;
} }
data = bvec_kmap_irq(&bv, &flags); data = bvec_kmap_irq(&bv, &flags);
memcpy(dc->lblk + lblk_off, data, bv.bv_len); memcpy(zc->lblk + lblk_off, data, bv.bv_len);
bvec_kunmap_irq(data, &flags); bvec_kunmap_irq(data, &flags);
dc->lblk_dirty = true; zc->lblk_dirty = true;
}
ret = lblk_flush(dc); zone_cache_put(dc, zc);
if (ret) {
return ret;
} }
return 0; return 0;
@ -947,8 +1078,6 @@ static void compress_io(struct dm_compress_io* io)
struct dm_compress* dc = io->dc; struct dm_compress* dc = io->dc;
struct bio* bio = io->bio; struct bio* bio = io->bio;
mutex_lock(&dc->io_lock);
switch (bio_op(bio)) { switch (bio_op(bio)) {
case REQ_OP_READ: case REQ_OP_READ:
ret = compress_read(dc, bio); ret = compress_read(dc, bio);
@ -964,8 +1093,6 @@ static void compress_io(struct dm_compress_io* io)
printk(KERN_ERR "%s: failed, ret=%d\n", __func__, ret); printk(KERN_ERR "%s: failed, ret=%d\n", __func__, ret);
} }
mutex_unlock(&dc->io_lock);
bio->bi_status = (ret == 0 ? BLK_STS_OK : BLK_STS_IOERR); /* XXX */ bio->bi_status = (ret == 0 ? BLK_STS_OK : BLK_STS_IOERR); /* XXX */
bio_endio(bio); bio_endio(bio);
} }
@ -1072,11 +1199,17 @@ static void
compress_dtr(struct dm_target *ti) compress_dtr(struct dm_target *ti)
{ {
struct dm_compress *dc; struct dm_compress *dc;
unsigned int n;
printk(KERN_INFO "%s: enter\n", __func__); printk(KERN_INFO "%s: enter\n", __func__);
dc = (struct dm_compress *)ti->private; dc = (struct dm_compress *)ti->private;
compress_free_buffers(dc); if (dc->zcache) {
for (n = 0; n < dc->nr_zc; ++n) {
zone_cache_dtr(dc, &dc->zcache[n]);
}
kfree(dc->zcache);
}
if (dc->io_queue) { if (dc->io_queue) {
destroy_workqueue(dc->io_queue); destroy_workqueue(dc->io_queue);
} }