cbd/dm-compress/lbatview.c

627 lines
17 KiB
C

/*
* Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/device-mapper.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/lz4.h>
#include <linux/dm-compress.h>
struct lbatview {
struct list_head list;
u64 pblk;
struct mutex reflock;
unsigned int ref;
struct mutex lock;
struct compress_params* kparams;
struct compress_stats* kstats;
struct pbatcache* pc;
struct lbatpblkcache* lpc;
struct pbat* pbat;
struct lbatpblk* pages[2];
};
static bool
lbatview_ctr(struct lbatview* lv,
struct compress_params* kparams,
struct compress_stats* kstats,
struct pbatcache* pc,
struct lbatpblkcache* lpc)
{
memset(lv, 0, sizeof(struct lbatview));
INIT_LIST_HEAD(&lv->list);
lv->pblk = PBLK_NONE;
mutex_init(&lv->reflock);
lv->ref = 0;
mutex_init(&lv->lock);
lv->kparams = kparams;
lv->kstats = kstats;
lv->pc = pc;
lv->lpc = lpc;
lv->pbat = NULL;
lv->pages[0] = lv->pages[1] = NULL;
return true;
}
static void
lbatview_dtr(struct lbatview* lv)
{
lbatpblkcache_put(lv->lpc, lv->pages[1]);
lv->pages[1] = NULL;
lbatpblkcache_put(lv->lpc, lv->pages[0]);
lv->pages[0] = NULL;
pbatcache_put(lv->pc, lv->pbat);
lv->pbat = NULL;
lv->lpc = NULL;
lv->pc = NULL;
}
static int
lbatview_flush(struct lbatview* lv)
{
int ret = 0;
int err;
mutex_lock(&lv->lock);
if (lv->pages[1]) {
err = lbatpblkcache_put(lv->lpc, lv->pages[1]);
if (err) {
ret = err;
}
lv->pages[1] = NULL;
}
if (lv->pages[0]) {
err = lbatpblkcache_put(lv->lpc, lv->pages[0]);
if (err) {
ret = err;
}
lv->pages[0] = NULL;
}
err = pbatcache_put(lv->pc, lv->pbat);
lv->pbat = NULL;
if (err) {
ret = err;
}
mutex_unlock(&lv->lock);
return ret;
}
static int
lbatview_reset(struct lbatview* lv, u64 pblk, u32 count)
{
int ret = 0;
if (lv->pbat) { printk(KERN_ERR "%s: pbat leak\n", __func__); }
if (lv->pages[0]) { printk(KERN_ERR "%s: lbatpblk leak\n", __func__); }
if (lv->pages[1]) { printk(KERN_ERR "%s: lbatpblk leak\n", __func__); }
lv->pblk = pblk;
if (!ret && count > 0) {
lv->pages[0] = lbatpblkcache_get(lv->lpc, pblk + 0);
if (!lv->pages[0]) {
ret = -EIO;
}
}
if (!ret && count > 1) {
lv->pages[1] = lbatpblkcache_get(lv->lpc, pblk + 1);
if (!lv->pages[1]) {
ret = -EIO;
}
}
if (ret) {
lbatpblkcache_put(lv->lpc, lv->pages[1]);
lv->pages[1] = NULL;
lbatpblkcache_put(lv->lpc, lv->pages[0]);
lv->pages[0] = NULL;
lv->pblk = PBLK_NONE;
}
return ret;
}
static u64
lbatview_alloc_pblk(struct lbatview* lv)
{
int ret = 0;
u32 zone = zone_for_pblk(&lv->kparams->params, lv->pblk);
u64 pblk;
u32 zone_off;
struct pbat* pbat;
if (lv->pbat) {
pblk = pbat_alloc(lv->pbat);
if (pblk != PBLK_NONE) {
return pblk;
}
ret = pbatcache_put(lv->pc, lv->pbat);
lv->pbat = NULL;
if (ret) {
printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
return PBLK_NONE;
}
}
pbat = pbatcache_get(lv->pc, zone, true);
if (pbat) {
pblk = pbat_alloc(pbat);
if (pblk != PBLK_NONE) {
lv->pbat = pbat;
return pblk;
}
ret = pbatcache_put(lv->pc, pbat);
if (ret) {
printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
return PBLK_NONE;
}
}
for (zone_off = 1;
zone_off <= zone || zone + zone_off < lv->kparams->params.nr_zones;
++zone_off) {
if (zone_off <= zone) {
pbat = pbatcache_get(lv->pc, zone - zone_off, true);
if (pbat) {
pblk = pbat_alloc(pbat);
if (pblk != PBLK_NONE) {
lv->pbat = pbat;
return pblk;
}
ret = pbatcache_put(lv->pc, pbat);
if (ret) {
printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
return PBLK_NONE;
}
}
}
if (zone + zone_off < lv->kparams->params.nr_zones) {
pbat = pbatcache_get(lv->pc, zone + zone_off, true);
if (pbat) {
pblk = pbat_alloc(pbat);
if (pblk != PBLK_NONE) {
lv->pbat = pbat;
return pblk;
}
ret = pbatcache_put(lv->pc, pbat);
if (ret) {
printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
return PBLK_NONE;
}
}
}
}
printk(KERN_ERR "%s: fail, all zones full\n", __func__);
return PBLK_NONE;
}
static int
lbatview_free_pblk(struct lbatview* lv, u64 pblk)
{
int ret = 0;
u32 zone = zone_for_pblk(&lv->kparams->params, lv->pblk);
u32 pblk_zone;
struct pbat* pbat;
pblk_zone = zone_for_pblk(&lv->kparams->params, pblk);
if (pblk_zone == ZONE_NONE || pblk_zone >= lv->kparams->params.nr_zones) {
printk(KERN_ERR "%s: pblk=%lu: zone out of bounds\n", __func__, (unsigned long)pblk);
return -EINVAL;
}
pbat = pbatcache_get(lv->pc, pblk_zone, false);
if (!pbat) {
printk(KERN_ERR "%s: pbatcache_get failed\n", __func__);
return -EINVAL;
}
ret = pbat_free(pbat, pblk);
BUG_ON(ret != 0);
if (lv->pbat && pbat_zone(lv->pbat) != zone && pblk_zone == zone) {
ret = pbatcache_put(lv->pc, lv->pbat);
if (ret) {
printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
}
lv->pbat = pbat;
}
else {
ret = pbatcache_put(lv->pc, pbat);
if (ret) {
printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
}
}
return ret;
}
static u32
lbatview_elem_off(struct lbatview* lv, u64 lblk)
{
u32 lv_zone = zone_for_pblk(&lv->kparams->params, lv->pblk);
/* The relative lblk in the zone. */
u32 zone_rel_lblk = lblk - (lv_zone * lv->kparams->params.lblk_per_zone);
/* The offset of the element in the (full) lbat. */
u32 lbat_elem_off = zone_rel_lblk * lba_len(&lv->kparams->params);
/* The offset of the first view pblk. */
u32 lbatview_off = pblk_size(&lv->kparams->params) *
(lv->pblk - lbat_off(&lv->kparams->params, lv_zone));
return lbat_elem_off - lbatview_off;
}
static void
lbatview_rmem(struct lbatview* lv, u32 off, u32 len, void* buf)
{
u32 pblk_len = pblk_size(&lv->kparams->params);
BUG_ON(off + len > 2 * pblk_len);
if (off < pblk_len && off + len > pblk_len) {
u32 len0 = pblk_len - off;
u8* pagebuf0 = lbatpblk_get_buf(lv->pages[0], false);
u8* pagebuf1 = lbatpblk_get_buf(lv->pages[1], false);
memcpy(buf, pagebuf0 + off, len0);
memcpy(buf + len0, pagebuf1, len - len0);
lbatpblk_put_buf(lv->pages[1]);
lbatpblk_put_buf(lv->pages[0]);
}
else {
u32 bufidx = off / pblk_len;
u32 bufoff = off % pblk_len;
u8* pagebuf = lbatpblk_get_buf(lv->pages[bufidx], false);
memcpy(buf, pagebuf + bufoff, len);
lbatpblk_put_buf(lv->pages[bufidx]);
}
}
static void
lbatview_wmem(struct lbatview* lv, u32 off, u32 len, void* buf)
{
u32 pblk_len = pblk_size(&lv->kparams->params);
BUG_ON(off + len > 2 * pblk_len);
if (off < pblk_len && off + len > pblk_len) {
u32 len0 = pblk_len - off;
u8* pagebuf0 = lbatpblk_get_buf(lv->pages[0], true);
u8* pagebuf1 = lbatpblk_get_buf(lv->pages[1], true);
memcpy(pagebuf0 + off, buf, len0);
memcpy(pagebuf1, buf + len0, len - len0);
lbatpblk_put_buf(lv->pages[1]);
lbatpblk_put_buf(lv->pages[0]);
}
else {
u32 bufidx = off / pblk_len;
u32 bufoff = off % pblk_len;
u8* pagebuf = lbatpblk_get_buf(lv->pages[bufidx], true);
memcpy(pagebuf + bufoff, buf, len);
lbatpblk_put_buf(lv->pages[bufidx]);
}
}
int
lbatview_elem_realloc(struct lbatview* lv, u64 lblk, u32 len)
{
int ret = 0;
int err;
u32 elem_off;
u32 elem_len;
u32 req_nalloc;
u32 cur_nalloc;
u32 old_nalloc;
u32 off;
u64 pblk;
u32 elem_lelen;
u64 elem_lepblk;
mutex_lock(&lv->lock);
elem_off = lbatview_elem_off(lv, lblk);
req_nalloc = (len == CBD_UNCOMPRESSED) ?
lblk_per_pblk(&lv->kparams->params) :
DIV_ROUND_UP(len, pblk_size(&lv->kparams->params));
elem_lelen = 0;
lbatview_rmem(lv, elem_off, lba_elem_len_bytes(&lv->kparams->params), &elem_lelen);
elem_len = __le32_to_cpu(elem_lelen);
cur_nalloc = (elem_len == CBD_UNCOMPRESSED) ?
lblk_per_pblk(&lv->kparams->params) :
DIV_ROUND_UP(elem_len, pblk_size(&lv->kparams->params));
old_nalloc = cur_nalloc;
while (cur_nalloc < req_nalloc) {
off = elem_off + lba_elem_len_bytes(&lv->kparams->params) +
cur_nalloc * lba_elem_pblk_bytes(&lv->kparams->params);
pblk = lbatview_alloc_pblk(lv);
if (pblk == PBLK_NONE) {
printk(KERN_ERR "%s: lbatview_alloc_pblk failed\n", __func__);
ret = -ENOSPC;
req_nalloc = old_nalloc;
goto do_free;
}
elem_lepblk = __cpu_to_le64(pblk);
lbatview_wmem(lv, off, lba_elem_pblk_bytes(&lv->kparams->params), &elem_lepblk);
++cur_nalloc;
}
do_free:
while (cur_nalloc > req_nalloc) {
--cur_nalloc;
off = elem_off + lba_elem_len_bytes(&lv->kparams->params) +
cur_nalloc * lba_elem_pblk_bytes(&lv->kparams->params);
elem_lepblk = 0;
lbatview_rmem(lv, off, lba_elem_pblk_bytes(&lv->kparams->params), &elem_lepblk);
pblk = __le64_to_cpu(elem_lepblk);
err = lbatview_free_pblk(lv, pblk);
if (err) {
printk(KERN_ERR "%s: lbatview_free_pblk failed\n", __func__);
ret = err;
}
}
if (!ret) {
elem_lelen = __cpu_to_le32(len);
lbatview_wmem(lv, elem_off, lba_elem_len_bytes(&lv->kparams->params), &elem_lelen);
}
mutex_lock(&lv->kstats->lock);
if (old_nalloc == 0) {
if (cur_nalloc != 0) {
++lv->kstats->stats.lblk_used;
}
}
else {
if (cur_nalloc == 0) {
--lv->kstats->stats.lblk_used;
}
}
mutex_unlock(&lv->kstats->lock);
mutex_unlock(&lv->lock);
return ret;
}
u32
lbatview_elem_len(struct lbatview* lv, u64 lblk)
{
u32 off;
u32 elem_lelen;
mutex_lock(&lv->lock);
off = lbatview_elem_off(lv, lblk);
elem_lelen = 0;
lbatview_rmem(lv, off, lba_elem_len_bytes(&lv->kparams->params), &elem_lelen);
mutex_unlock(&lv->lock);
return __le32_to_cpu(elem_lelen);
}
u64
lbatview_elem_pblk(struct lbatview* lv, u64 lblk, u32 idx)
{
u32 off;
u64 elem_lepblk;
u64 pblk;
u32 pblk_zone;
mutex_lock(&lv->lock);
off = lbatview_elem_off(lv, lblk) +
lba_elem_len_bytes(&lv->kparams->params) +
idx * lba_elem_pblk_bytes(&lv->kparams->params);
elem_lepblk = 0;
lbatview_rmem(lv, off, lba_elem_pblk_bytes(&lv->kparams->params), &elem_lepblk);
mutex_unlock(&lv->lock);
pblk = __le64_to_cpu(elem_lepblk);
pblk_zone = zone_for_pblk(&lv->kparams->params, pblk);
if (pblk_zone == ZONE_NONE || pblk_zone >= lv->kparams->params.nr_zones) {
printk(KERN_ERR "%s: pblk %lu out of range at lblk=%lu n=%u\n",
__func__, (unsigned long)pblk, (unsigned long)lblk, idx);
return PBLK_NONE;
}
if (pblk < zone_data_off(&lv->kparams->params, pblk_zone)) {
printk(KERN_ERR "%s: pblk in metadata at lblk=%lu n=%u\n",
__func__, (unsigned long)pblk, idx);
return PBLK_NONE;
}
return pblk;
}
struct lbatviewcache {
struct cbd_params* params;
struct pbatcache* pc;
struct lbatpblkcache* lpc;
struct mutex cache_lock;
struct list_head cache_head;
unsigned int cache_len;
struct lbatview* cache;
};
size_t
lbatviewcache_size(void)
{
return sizeof(struct lbatviewcache);
}
bool
lbatviewcache_ctr(struct lbatviewcache* lvc,
struct compress_params* kparams, struct compress_stats* kstats,
u32 cache_pages)
{
struct lbatview* cache;
u32 cache_len;
u32 n;
memset(lvc, 0, sizeof(struct lbatviewcache));
lvc->params = &kparams->params;
lvc->pc = kmalloc(pbatcache_size(), GFP_KERNEL);
if (!lvc->pc) {
return false;
}
if (!pbatcache_ctr(lvc->pc, kparams, kstats, cache_pages)) {
return false;
}
lvc->lpc = kmalloc(lbatpblkcache_size(), GFP_KERNEL);
if (!lvc->lpc) {
return false;
}
if (!lbatpblkcache_ctr(lvc->lpc, kparams, kstats, cache_pages)) {
return false;
}
/* lbatviewcache gets one entry per lbatpblk (XXX: 5/6?) */
cache_len = (cache_pages * 15 / 32);
if (!cache_len) {
printk(KERN_ERR "%s: Cache too small\n", __func__);
return false;
}
printk(KERN_INFO "%s: cache_len=%u\n", __func__, cache_len);
cache = kzalloc(cache_len * sizeof(struct lbatview), GFP_KERNEL);
if (!cache) {
return false;
}
mutex_init(&lvc->cache_lock);
INIT_LIST_HEAD(&lvc->cache_head);
lvc->cache_len = cache_len;
lvc->cache = cache;
for (n = 0; n < cache_len; ++n) {
if (!lbatview_ctr(&cache[n], kparams, kstats, lvc->pc, lvc->lpc)) {
return false;
}
list_add_tail(&cache[n].list, &lvc->cache_head);
}
return true;
}
void
lbatviewcache_dtr(struct lbatviewcache* lvc)
{
unsigned int n;
struct lbatview* lv;
for (n = 0; n < lvc->cache_len; ++n) {
lv = &lvc->cache[n];
if (!lv) {
continue;
}
lbatview_dtr(lv);
if (lv->ref) {
printk(KERN_ERR "%s: lbatview ref leak: n=%u ref=%u\n", __func__, n, lv->ref);
}
}
kfree(lvc->cache);
lvc->cache = NULL;
lvc->cache_len = 0;
if (lvc->lpc) {
lbatpblkcache_dtr(lvc->lpc);
kfree(lvc->lpc);
lvc->lpc = NULL;
}
if (lvc->pc) {
pbatcache_dtr(lvc->pc);
kfree(lvc->pc);
lvc->pc = NULL;
}
lvc->params = NULL;
}
struct lbatview*
lbatviewcache_get(struct lbatviewcache* lvc, u64 lblk)
{
u32 zone;
u64 zone_lbat_pblk;
u32 rel_lblk;
u32 lbat_offset;
u32 rel_pblk;
u64 pblk;
u32 count;
struct lbatview* lv;
zone = lblk / lvc->params->lblk_per_zone;
zone_lbat_pblk = lbat_off(lvc->params, zone);
rel_lblk = lblk - lvc->params->lblk_per_zone * zone;
lbat_offset = rel_lblk * lba_len(lvc->params);
rel_pblk = lbat_offset / pblk_size(lvc->params);
pblk = zone_lbat_pblk + rel_pblk;
count = (rel_pblk == lbat_len(lvc->params) - 1) ? 1 : 2;
mutex_lock(&lvc->cache_lock);
list_for_each_entry(lv, &lvc->cache_head, list) {
mutex_lock(&lv->reflock);
if (lv->pblk == pblk) {
list_move(&lv->list, &lvc->cache_head);
mutex_unlock(&lvc->cache_lock);
if (lv->ref == 0) {
goto found;
}
++lv->ref;
mutex_unlock(&lv->reflock);
return lv;
}
if (lv->pblk == PBLK_NONE) {
list_move(&lv->list, &lvc->cache_head);
mutex_unlock(&lvc->cache_lock);
goto found;
}
mutex_unlock(&lv->reflock);
}
list_for_each_entry_reverse(lv, &lvc->cache_head, list) {
mutex_lock(&lv->reflock);
if (lv->ref == 0) {
list_move(&lv->list, &lvc->cache_head);
mutex_unlock(&lvc->cache_lock);
goto found;
}
mutex_unlock(&lv->reflock);
}
printk(KERN_ERR "%s: failed to find free entry\n", __func__);
mutex_unlock(&lvc->cache_lock);
return NULL;
found:
if (lbatview_reset(lv, pblk, count) != 0) {
mutex_unlock(&lv->reflock);
return NULL;
}
lv->ref = 1;
mutex_unlock(&lv->reflock);
return lv;
}
int
lbatviewcache_put(struct lbatviewcache* lvc, struct lbatview* lv)
{
int ret = 0;
if (!lv) {
return 0;
}
mutex_lock(&lv->reflock);
if (--lv->ref == 0) {
ret = lbatview_flush(lv);
if (ret) {
printk(KERN_ERR "%s: lbatview_flush failed\n", __func__);
}
}
mutex_unlock(&lv->reflock);
return ret;
}