cbd/dm-compress/lbd.c

/*
 * Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301, USA. 
 */

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/device-mapper.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>

#include <linux/lz4.h>

#include <linux/dm-compress.h>

struct lbd {
    u64                         lblk;
    struct mutex                reflock;
    unsigned int                ref;

    struct mutex                lock;
    enum cache_state            state;
    struct cbd_params*          params;
    struct lbatviewcache*       lvc;
    struct lbatview*            lv;
    u8*                         lz4_wrkmem;
    struct page*                lz4_cpages;
    u8*                         lz4_cbuf;
    struct page*                pages;
    u8*                         buf;
};

/*
 * Allocating lz4_wrkmem percpu:
 *
 * If the alloc is per-instance, it would need to be allocated in compress.c
 * and passed around.  The easiest way to pass it around is likely to make it
 * part of a struct.  We can't use struct compress because that is private.
 * So we would need to create a struct (say, compress_percpu).
 *
 * If the alloc is global, we can just declare it file-local.  But it would
 * need to be the largest possible size.  Which means we probably don't want
 * to use alloc_percpu_gfp() directly, because 1mb chunks are probably not
 * that common.  So suppose we allocate a percpu vector of page ptrs.
 *
 * #define COMPRESS_MAX_INPUT_SIZE (1 << LBLK_SHIFT_MAX)
 * #define COMPRESS_LZ4_BOUND      LZ4_COMPRESSBOUND(COMPRESS_MAX_INPUT_SIZE)
 * #define WRKMEM_PAGES            DIV_ROUND_UP(COMPRESS_LZ4_BOUND, PAGE_SIZE)
 * typedef struct page*[WRKMEM_PAGES] lz4_wrkmem_pagev_t;
 *
 * g_lz4_wrkmem = alloc_percpu_gfp(lz4_wrkmem_pagev_t, GFP_IO);
 *
 * That's not bad at all.  But how do we alloc (and free) the actual pages?
 *
 * pagev = get_cpu_var(g_lz4_wrkmem);
 * put_cpu_var(pagev);
 *
 * free_percpu(g_lz4_wrkmem);
 */

static inline bool
lblk_is_zeros(struct cbd_params* params, struct lbd* lbd)
{
#ifdef CBD_DETECT_ZERO_BLOCKS
    u32 off;
    u32 len = PBLK_SIZE * lblk_per_pblk(params);

    for (off = 0; off < len; ++off) {
        if (lbd->lblk_buf[off]) {
            return false;
        }
    }

    return true;
#else
    return false;
#endif
}

/*
 * Compress dc->lblk into dc->lz4_cbuf
 *
 * Returns number of bytes in cbuf or 0 for failure.
 */
static size_t
lblk_compress(struct lbd* lbd)
{
    int ret;
    void *dbuf = lbd->buf;
    u32 dlen = PBLK_SIZE * lblk_per_pblk(lbd->params);
    void *cbuf = lbd->lz4_cbuf;
    u32 clen = PBLK_SIZE * lblk_per_pblk(lbd->params);

    ret = LZ4_compress_default(dbuf, cbuf, dlen, clen, lbd->lz4_wrkmem);
    if (ret <= 0) {
        return 0;
    }

    return (size_t)ret;
}

/*
 * Decompress dc->lz4_cbuf of size clen into dc->lblk
 *
 * Returns 0 for success, <0 for failure.
 */
static int
lblk_decompress(struct lbd* lbd, u32 clen)
{
    int ret;
    void *cbuf = lbd->lz4_cbuf;
    void *dbuf = lbd->buf;
    u32 dlen = PBLK_SIZE * lblk_per_pblk(lbd->params);

    ret = LZ4_decompress_safe(cbuf, dbuf, clen, dlen);
    if (ret != dlen) {
        printk(KERN_ERR "%s: failed, ret=%d (expected %u)\n", __func__, ret, (unsigned int)dlen);
        return -1;
    }

    return 0;
}

bool
lbd_ctr(struct lbd* lbd,
        struct cbd_params* params,
        struct lbatviewcache* lvc)
{
    memset(lbd, 0, sizeof(struct lbd));
    lbd->lblk = LBLK_NONE;
    mutex_init(&lbd->reflock);
    lbd->ref = 0;
    mutex_init(&lbd->lock);
    lbd->state = CACHE_STATE_UNCACHED;
    lbd->params = params;
    lbd->lvc = lvc;
    lbd->lv = NULL;
    lbd->lz4_wrkmem = kmalloc(LZ4_compressBound(PBLK_SIZE * lblk_per_pblk(lbd->params)), GFP_KERNEL);
    if (!lbd->lz4_wrkmem) {
        return false;
    }
    lbd->lz4_cpages = cbd_alloc_pages(lblk_per_pblk(lbd->params));
    if (!lbd->lz4_cpages) {
        return false;
    }
    lbd->lz4_cbuf = page_address(lbd->lz4_cpages);
    lbd->pages = cbd_alloc_pages(lblk_per_pblk(lbd->params));
    if (!lbd->pages) {
        return false;
    }
    lbd->buf = page_address(lbd->pages);

    return true;
}

void
lbd_dtr(struct lbd* lbd)
{
    if (lbatviewcache_put(lbd->lvc, lbd->lv) != 0) {
        printk(KERN_ERR "%s: lbatviewcache_put failed\n", __func__);
    }
    lbd->lv = NULL;
    cbd_free_pages(lbd->pages, lblk_per_pblk(lbd->params));
    lbd->pages = NULL;
    lbd->buf = NULL;
    cbd_free_pages(lbd->lz4_cpages, lblk_per_pblk(lbd->params));
    lbd->lz4_cpages = NULL;
    lbd->lz4_cbuf = NULL;
    kfree(lbd->lz4_wrkmem);
    lbd->lz4_wrkmem = NULL;
}

static void
lbd_flush_endio(struct bio* bio)
{
    struct lbd* lbd = bio->bi_private;
    int ret;

    cbd_free_page(bio->bi_io_vec[0].bv_page);
    ret = pblk_endio(bio);
    if (ret) {
        printk(KERN_ERR "%s: I/O failed\n", __func__);
        lbd->state = CACHE_STATE_ERROR;
    }
}

int
lbd_flush(struct lbd* lbd)
{
    int ret = 0;
    u32 c_len;
    u32 elem_len;
    u8* p;
    u32 n;
    u64 pblk;
    struct page* iopagev[1];

    mutex_lock(&lbd->lock);
    if (lbd->state != CACHE_STATE_DIRTY) {
        if (lbd->state == CACHE_STATE_ERROR) {
            ret = -EIO;
            goto out;
        }
        goto clean;
    }

    if (lblk_is_zeros(lbd->params, lbd)) {
        c_len = 0;
        elem_len = 0;
        p = NULL;
    }
    else {
        c_len = lblk_compress(lbd);
        if (c_len > 0) {
            size_t c_blkrem = c_len % PBLK_SIZE;
            if (c_blkrem) {
                memset(lbd->lz4_cbuf + c_len, 0, c_blkrem);
            }
            elem_len = c_len;
            p = lbd->lz4_cbuf;
        }
        else {
            c_len = PBLK_SIZE * lblk_per_pblk(lbd->params);
            elem_len = CBD_UNCOMPRESSED;
            p = lbd->buf;
        }
    }

    ret = lbatview_elem_realloc(lbd->lv, lbd->lblk, elem_len);
    if (ret) {
        goto out;
    }
    for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) {
        pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n);
        if (pblk == PBLK_NONE) {
            ret = -EIO;
            goto out;
        }
        iopagev[0] = cbd_alloc_page();
        if (!iopagev[0]) {
            printk(KERN_ERR "%s: out of memory\n", __func__);
            ret = -ENOMEM;
            goto out;
        }
        memcpy(page_address(iopagev[0]), p, PBLK_SIZE);
        pblk_write(lbd->params, pblk, 1, iopagev, lbd_flush_endio, lbd);
    }

clean:
    ret = lbatviewcache_put(lbd->lvc, lbd->lv);
    lbd->lv = NULL;
    if (ret) {
        lbd->state = CACHE_STATE_ERROR;
        goto out;
    }
    lbd->state = CACHE_STATE_CLEAN;

out:
    mutex_unlock(&lbd->lock);
    return ret;
}

int
lbd_read(struct lbd* lbd)
{
    int ret = 0;
    u32 c_len;
    u64 pblk;
   struct page* iopagev[1];

    mutex_lock(&lbd->lock);
    if (lbd->state != CACHE_STATE_UNCACHED) {
        goto out;
    }
    ret = lbatview_read(lbd->lv);
    if (ret) {
        goto out;
    }
    c_len = lbatview_elem_len(lbd->lv, lbd->lblk);
    if (c_len == 0) {
        memset(lbd->buf, 0, PBLK_SIZE * lblk_per_pblk(lbd->params));
    }
    else {
        bool is_compressed = true;
        u32 d_len = PBLK_SIZE * lblk_per_pblk(lbd->params);
        u32 n;
        u8* p;

        if (c_len == CBD_UNCOMPRESSED) {
            is_compressed = false;
            c_len = d_len;
        }
        p = lbd->lz4_cbuf;
        for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) {
            pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n);
            if (pblk == PBLK_NONE) {
                ret = -EIO;
                goto out;
            }
            /* XXX: check pblk not in metadata? */
            iopagev[0] = virt_to_page(p);
            ret = pblk_read_wait(lbd->params, pblk, 1, iopagev);
            if (ret) {
                goto out;
            }
        }
        if (is_compressed) {
            if (lblk_decompress(lbd, c_len) != 0) {
                printk(KERN_ERR "  decompress failed\n");
                ret = -EIO;
                goto out;
            }
        }
        else {
            memcpy(lbd->buf, lbd->lz4_cbuf, d_len);
        }
    }
    lbd->state = CACHE_STATE_CLEAN;

out:
    mutex_unlock(&lbd->lock);
    return ret;
}

bool
lbd_reset(struct lbd* lbd, u64 lblk)
{
    if (lbd->lv) { printk(KERN_ERR "%s: lbatview leak\n", __func__); }

    lbd->lv = lbatviewcache_get(lbd->lvc, lblk);
    if (!lbd->lv) {
        printk(KERN_ERR "%s: lbatviewcache_get failed\n", __func__);
        return false;
    }
    lbd->lblk = lblk;
    lbd->state = CACHE_STATE_UNCACHED;

    return true;
}

void
lbd_data_read(struct lbd* lbd, u32 off, u32 len, u8* buf)
{
    /* XXX: convert to BUG_ON */
    if (off + len > PBLK_SIZE * lblk_per_pblk(lbd->params)) {
        printk(KERN_ERR "%s: out of bounds\n", __func__);
        return;
    }
    mutex_lock(&lbd->lock);
    BUG_ON(lbd->state == CACHE_STATE_UNCACHED);
    memcpy(buf, lbd->buf + off, len);
    mutex_unlock(&lbd->lock);
}

void
lbd_data_write(struct lbd* lbd, u32 off, u32 len, const u8* buf)
{
    /* XXX: convert to BUG_ON */
    if (off + len > PBLK_SIZE * lblk_per_pblk(lbd->params)) {
        printk(KERN_ERR "%s: out of bounds\n", __func__);
        return;
    }
    mutex_lock(&lbd->lock);
    BUG_ON(lbd->state == CACHE_STATE_UNCACHED);
    memcpy(lbd->buf + off, buf, len);
    lbd->state = CACHE_STATE_DIRTY;
    mutex_unlock(&lbd->lock);
}

struct lbdcache
{
    struct mutex                lock;
    struct cbd_params*          params;
    struct lbatviewcache*       lvc;
    unsigned int                len;
    struct lbd**                cache;
};

size_t
lbdcache_size(void)
{
    return sizeof(struct lbdcache);
}

static bool
lbdcache_realloc(struct lbdcache* lc, unsigned int len)
{
    struct lbd** cache;
    unsigned int n;
    struct lbd* lbd;

    cache = kzalloc(len * sizeof(struct lbd*), GFP_KERNEL);
    if (!cache) {
        return false;
    }
    n = 0;
    if (lc->len) {
        memcpy(cache, lc->cache, lc->len * sizeof(struct lbd*));
        n = lc->len;
        kfree(lc->cache);
    }
    lc->len = len;
    lc->cache = cache;
    while (n < len) {
        lbd = kmalloc(sizeof(struct lbd), GFP_KERNEL);
        if (!lbd) {
            return false;
        }
        cache[n++] = lbd;
        if (!lbd_ctr(lbd, lc->params, lc->lvc)) {
            return false;
        }
    }

    return true;
}

bool
lbdcache_ctr(struct lbdcache* lc,
        struct cbd_params* params)
{
    memset(lc, 0, sizeof(struct lbdcache));
    mutex_init(&lc->lock);
    lc->params = params;
    lc->lvc = kzalloc(lbatviewcache_size(), GFP_KERNEL);
    if (!lc->lvc) {
        return false;
    }
    if (!lbatviewcache_ctr(lc->lvc, params)) {
        return false;
    }

    return lbdcache_realloc(lc, 1024);
}

void
lbdcache_dtr(struct lbdcache* lc)
{
    unsigned int n;
    struct lbd* lbd;

    for (n = 0; n < lc->len; ++n) {
        lbd = lc->cache[n];
        if (!lbd) {
            continue;
        }
        lbd_dtr(lbd);
        if (lbd->ref) {
            printk(KERN_ERR "%s: lbd ref leak: n=%u ref=%u\n", __func__, n, lbd->ref);
        }
        kfree(lbd);
    }
    kfree(lc->cache);
    lc->cache = NULL;
    lc->len = 0;
    lbatviewcache_dtr(lc->lvc);
    kfree(lc->lvc);
    lc->lvc = NULL;
    lc->params = NULL;
}

struct lbd*
lbdcache_get(struct lbdcache* lc, u64 lblk)
{
    unsigned int n;
    struct lbd* lbd;

    mutex_lock(&lc->lock);
    for (n = 0; n < lc->len; ++n) {
        lbd = lc->cache[n];
        mutex_lock(&lbd->reflock);
        if (lbd->lblk == lblk) {
            if (lbd->ref == 0) {
                goto found;
            }
            ++lbd->ref;
            mutex_unlock(&lbd->reflock);
            goto out;
        }
        mutex_unlock(&lbd->reflock);
    }
    for (n = 0; n < lc->len; ++n) {
        lbd = lc->cache[n];
        mutex_lock(&lbd->reflock);
        if (lbd->lblk == LBLK_NONE) {
            goto found;
        }
        mutex_unlock(&lbd->reflock);
    }
    for (n = 0; n < lc->len; ++n) {
        lbd = lc->cache[n];
        mutex_lock(&lbd->reflock);
        if (lbd->ref == 0 && lbd->state != CACHE_STATE_ERROR) {
            goto found;
        }
        mutex_unlock(&lbd->reflock);
    }
    printk(KERN_INFO "%s: all objects in use, realloc...\n", __func__);
    n = lc->len;
    if (!lbdcache_realloc(lc, lc->len * 2)) {
        printk(KERN_ERR "%s: realloc failed\n", __func__);
        lbd = NULL;
        goto out;
    }
    printk(KERN_INFO "%s: realloc done, using n=%u\n", __func__, n);
    lbd = lc->cache[n];
    mutex_lock(&lbd->reflock);

found:
    if (!lbd_reset(lbd, lblk)) {
        mutex_unlock(&lbd->reflock);
        printk(KERN_ERR "%s: lbd_reset failed\n", __func__);
        lbd = NULL;
        goto out;
    }
    lbd->ref = 1;
    mutex_unlock(&lbd->reflock);

out:
    mutex_unlock(&lc->lock);

    return lbd;
}

int
lbdcache_put(struct lbdcache* lc, struct lbd* lbd)
{
    int ret = 0;

    if (!lbd) {
        return 0;
    }
    mutex_lock(&lc->lock);
    mutex_lock(&lbd->reflock);
    if (--lbd->ref == 0) {
        ret = lbd_flush(lbd);
        if (ret) {
            printk(KERN_ERR "%s: lbd_flush failed\n", __func__);
        }
    }
    mutex_unlock(&lbd->reflock);
    mutex_unlock(&lc->lock);

    return ret;
}