/* * Copyright (c) 2019 Tom Marshall * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include struct lbd { u64 lblk; struct mutex reflock; unsigned int ref; struct mutex lock; enum cache_state state; struct cbd_params* params; struct lbatviewcache* lvc; struct lbatview* lv; u8* lz4_wrkmem; struct page* lz4_cpages; u8* lz4_cbuf; struct page* pages; u8* buf; }; /* * Allocating lz4_wrkmem percpu: * * If the alloc is per-instance, it would need to be allocated in compress.c * and passed around. The easiest way to pass it around is likely to make it * part of a struct. We can't use struct compress because that is private. * So we would need to create a struct (say, compress_percpu). * * If the alloc is global, we can just declare it file-local. But it would * need to be the largest possible size. Which means we probably don't want * to use alloc_percpu_gfp() directly, because 1mb chunks are probably not * that common. So suppose we allocate a percpu vector of page ptrs. * * #define COMPRESS_MAX_INPUT_SIZE (1 << LBLK_SHIFT_MAX) * #define COMPRESS_LZ4_BOUND LZ4_COMPRESSBOUND(COMPRESS_MAX_INPUT_SIZE) * #define WRKMEM_PAGES DIV_ROUND_UP(COMPRESS_LZ4_BOUND, PAGE_SIZE) * typedef struct page*[WRKMEM_PAGES] lz4_wrkmem_pagev_t; * * g_lz4_wrkmem = alloc_percpu_gfp(lz4_wrkmem_pagev_t, GFP_IO); * * That's not bad at all. But how do we alloc (and free) the actual pages? * * pagev = get_cpu_var(g_lz4_wrkmem); * put_cpu_var(pagev); * * free_percpu(g_lz4_wrkmem); */ static inline bool lblk_is_zeros(struct cbd_params* params, struct lbd* lbd) { #ifdef CBD_DETECT_ZERO_BLOCKS u32 off; u32 len = PBLK_SIZE * lblk_per_pblk(params); for (off = 0; off < len; ++off) { if (lbd->lblk_buf[off]) { return false; } } return true; #else return false; #endif } /* * Compress dc->lblk into dc->lz4_cbuf * * Returns number of bytes in cbuf or 0 for failure. */ static size_t lblk_compress(struct lbd* lbd) { int ret; void *dbuf = lbd->buf; u32 dlen = PBLK_SIZE * lblk_per_pblk(lbd->params); void *cbuf = lbd->lz4_cbuf; u32 clen = PBLK_SIZE * lblk_per_pblk(lbd->params); ret = LZ4_compress_default(dbuf, cbuf, dlen, clen, lbd->lz4_wrkmem); if (ret <= 0) { return 0; } return (size_t)ret; } /* * Decompress dc->lz4_cbuf of size clen into dc->lblk * * Returns 0 for success, <0 for failure. */ static int lblk_decompress(struct lbd* lbd, u32 clen) { int ret; void *cbuf = lbd->lz4_cbuf; void *dbuf = lbd->buf; u32 dlen = PBLK_SIZE * lblk_per_pblk(lbd->params); ret = LZ4_decompress_safe(cbuf, dbuf, clen, dlen); if (ret != dlen) { printk(KERN_ERR "%s: failed, ret=%d (expected %u)\n", __func__, ret, (unsigned int)dlen); return -1; } return 0; } bool lbd_ctr(struct lbd* lbd, struct cbd_params* params, struct lbatviewcache* lvc) { memset(lbd, 0, sizeof(struct lbd)); lbd->lblk = LBLK_NONE; mutex_init(&lbd->reflock); lbd->ref = 0; mutex_init(&lbd->lock); lbd->state = CACHE_STATE_UNCACHED; lbd->params = params; lbd->lvc = lvc; lbd->lv = NULL; lbd->lz4_wrkmem = kmalloc(LZ4_compressBound(PBLK_SIZE * lblk_per_pblk(lbd->params)), GFP_KERNEL); if (!lbd->lz4_wrkmem) { return false; } lbd->lz4_cpages = cbd_alloc_pages(lblk_per_pblk(lbd->params)); if (!lbd->lz4_cpages) { return false; } lbd->lz4_cbuf = page_address(lbd->lz4_cpages); lbd->pages = cbd_alloc_pages(lblk_per_pblk(lbd->params)); if (!lbd->pages) { return false; } lbd->buf = page_address(lbd->pages); return true; } void lbd_dtr(struct lbd* lbd) { if (lbatviewcache_put(lbd->lvc, lbd->lv) != 0) { printk(KERN_ERR "%s: lbatviewcache_put failed\n", __func__); } lbd->lv = NULL; cbd_free_pages(lbd->pages, lblk_per_pblk(lbd->params)); lbd->pages = NULL; lbd->buf = NULL; cbd_free_pages(lbd->lz4_cpages, lblk_per_pblk(lbd->params)); lbd->lz4_cpages = NULL; lbd->lz4_cbuf = NULL; kfree(lbd->lz4_wrkmem); lbd->lz4_wrkmem = NULL; } static void lbd_flush_endio(struct bio* bio) { struct lbd* lbd = bio->bi_private; int ret; cbd_free_page(bio->bi_io_vec[0].bv_page); ret = pblk_endio(bio); if (ret) { printk(KERN_ERR "%s: I/O failed\n", __func__); lbd->state = CACHE_STATE_ERROR; } } int lbd_flush(struct lbd* lbd) { int ret = 0; u32 c_len; u32 elem_len; u8* p; u32 n; u64 pblk; struct page* iopagev[1]; mutex_lock(&lbd->lock); if (lbd->state != CACHE_STATE_DIRTY) { if (lbd->state == CACHE_STATE_ERROR) { ret = -EIO; goto out; } goto clean; } if (lblk_is_zeros(lbd->params, lbd)) { c_len = 0; elem_len = 0; p = NULL; } else { c_len = lblk_compress(lbd); if (c_len > 0) { size_t c_blkrem = c_len % PBLK_SIZE; if (c_blkrem) { memset(lbd->lz4_cbuf + c_len, 0, c_blkrem); } elem_len = c_len; p = lbd->lz4_cbuf; } else { c_len = PBLK_SIZE * lblk_per_pblk(lbd->params); elem_len = CBD_UNCOMPRESSED; p = lbd->buf; } } ret = lbatview_elem_realloc(lbd->lv, lbd->lblk, elem_len); if (ret) { goto out; } for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) { pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n); if (pblk == PBLK_NONE) { ret = -EIO; goto out; } iopagev[0] = cbd_alloc_page(); if (!iopagev[0]) { printk(KERN_ERR "%s: out of memory\n", __func__); ret = -ENOMEM; goto out; } memcpy(page_address(iopagev[0]), p, PBLK_SIZE); pblk_write(lbd->params, pblk, 1, iopagev, lbd_flush_endio, lbd); } clean: ret = lbatviewcache_put(lbd->lvc, lbd->lv); lbd->lv = NULL; if (ret) { lbd->state = CACHE_STATE_ERROR; goto out; } lbd->state = CACHE_STATE_CLEAN; out: mutex_unlock(&lbd->lock); return ret; } int lbd_read(struct lbd* lbd) { int ret = 0; u32 c_len; u64 pblk; struct page* iopagev[1]; mutex_lock(&lbd->lock); if (lbd->state != CACHE_STATE_UNCACHED) { goto out; } ret = lbatview_read(lbd->lv); if (ret) { goto out; } c_len = lbatview_elem_len(lbd->lv, lbd->lblk); if (c_len == 0) { memset(lbd->buf, 0, PBLK_SIZE * lblk_per_pblk(lbd->params)); } else { bool is_compressed = true; u32 d_len = PBLK_SIZE * lblk_per_pblk(lbd->params); u32 n; u8* p; if (c_len == CBD_UNCOMPRESSED) { is_compressed = false; c_len = d_len; } p = lbd->lz4_cbuf; for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) { pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n); if (pblk == PBLK_NONE) { ret = -EIO; goto out; } /* XXX: check pblk not in metadata? */ iopagev[0] = virt_to_page(p); ret = pblk_read_wait(lbd->params, pblk, 1, iopagev); if (ret) { goto out; } } if (is_compressed) { if (lblk_decompress(lbd, c_len) != 0) { printk(KERN_ERR " decompress failed\n"); ret = -EIO; goto out; } } else { memcpy(lbd->buf, lbd->lz4_cbuf, d_len); } } lbd->state = CACHE_STATE_CLEAN; out: mutex_unlock(&lbd->lock); return ret; } bool lbd_reset(struct lbd* lbd, u64 lblk) { if (lbd->lv) { printk(KERN_ERR "%s: lbatview leak\n", __func__); } lbd->lv = lbatviewcache_get(lbd->lvc, lblk); if (!lbd->lv) { printk(KERN_ERR "%s: lbatviewcache_get failed\n", __func__); return false; } lbd->lblk = lblk; lbd->state = CACHE_STATE_UNCACHED; return true; } void lbd_data_read(struct lbd* lbd, u32 off, u32 len, u8* buf) { /* XXX: convert to BUG_ON */ if (off + len > PBLK_SIZE * lblk_per_pblk(lbd->params)) { printk(KERN_ERR "%s: out of bounds\n", __func__); return; } mutex_lock(&lbd->lock); BUG_ON(lbd->state == CACHE_STATE_UNCACHED); memcpy(buf, lbd->buf + off, len); mutex_unlock(&lbd->lock); } void lbd_data_write(struct lbd* lbd, u32 off, u32 len, const u8* buf) { /* XXX: convert to BUG_ON */ if (off + len > PBLK_SIZE * lblk_per_pblk(lbd->params)) { printk(KERN_ERR "%s: out of bounds\n", __func__); return; } mutex_lock(&lbd->lock); BUG_ON(lbd->state == CACHE_STATE_UNCACHED); memcpy(lbd->buf + off, buf, len); lbd->state = CACHE_STATE_DIRTY; mutex_unlock(&lbd->lock); } struct lbdcache { struct mutex lock; struct cbd_params* params; struct lbatviewcache* lvc; unsigned int len; struct lbd** cache; }; size_t lbdcache_size(void) { return sizeof(struct lbdcache); } static bool lbdcache_realloc(struct lbdcache* lc, unsigned int len) { struct lbd** cache; unsigned int n; struct lbd* lbd; cache = kzalloc(len * sizeof(struct lbd*), GFP_KERNEL); if (!cache) { return false; } n = 0; if (lc->len) { memcpy(cache, lc->cache, lc->len * sizeof(struct lbd*)); n = lc->len; kfree(lc->cache); } lc->len = len; lc->cache = cache; while (n < len) { lbd = kmalloc(sizeof(struct lbd), GFP_KERNEL); if (!lbd) { return false; } cache[n++] = lbd; if (!lbd_ctr(lbd, lc->params, lc->lvc)) { return false; } } return true; } bool lbdcache_ctr(struct lbdcache* lc, struct cbd_params* params) { memset(lc, 0, sizeof(struct lbdcache)); mutex_init(&lc->lock); lc->params = params; lc->lvc = kzalloc(lbatviewcache_size(), GFP_KERNEL); if (!lc->lvc) { return false; } if (!lbatviewcache_ctr(lc->lvc, params)) { return false; } return lbdcache_realloc(lc, 1024); } void lbdcache_dtr(struct lbdcache* lc) { unsigned int n; struct lbd* lbd; for (n = 0; n < lc->len; ++n) { lbd = lc->cache[n]; if (!lbd) { continue; } lbd_dtr(lbd); if (lbd->ref) { printk(KERN_ERR "%s: lbd ref leak: n=%u ref=%u\n", __func__, n, lbd->ref); } kfree(lbd); } kfree(lc->cache); lc->cache = NULL; lc->len = 0; lbatviewcache_dtr(lc->lvc); kfree(lc->lvc); lc->lvc = NULL; lc->params = NULL; } struct lbd* lbdcache_get(struct lbdcache* lc, u64 lblk) { unsigned int n; struct lbd* lbd; mutex_lock(&lc->lock); for (n = 0; n < lc->len; ++n) { lbd = lc->cache[n]; mutex_lock(&lbd->reflock); if (lbd->lblk == lblk) { if (lbd->ref == 0) { goto found; } ++lbd->ref; mutex_unlock(&lbd->reflock); goto out; } mutex_unlock(&lbd->reflock); } for (n = 0; n < lc->len; ++n) { lbd = lc->cache[n]; mutex_lock(&lbd->reflock); if (lbd->lblk == LBLK_NONE) { goto found; } mutex_unlock(&lbd->reflock); } for (n = 0; n < lc->len; ++n) { lbd = lc->cache[n]; mutex_lock(&lbd->reflock); if (lbd->ref == 0 && lbd->state != CACHE_STATE_ERROR) { goto found; } mutex_unlock(&lbd->reflock); } printk(KERN_INFO "%s: all objects in use, realloc...\n", __func__); n = lc->len; if (!lbdcache_realloc(lc, lc->len * 2)) { printk(KERN_ERR "%s: realloc failed\n", __func__); lbd = NULL; goto out; } printk(KERN_INFO "%s: realloc done, using n=%u\n", __func__, n); lbd = lc->cache[n]; mutex_lock(&lbd->reflock); found: if (!lbd_reset(lbd, lblk)) { mutex_unlock(&lbd->reflock); printk(KERN_ERR "%s: lbd_reset failed\n", __func__); lbd = NULL; goto out; } lbd->ref = 1; mutex_unlock(&lbd->reflock); out: mutex_unlock(&lc->lock); return lbd; } int lbdcache_put(struct lbdcache* lc, struct lbd* lbd) { int ret = 0; if (!lbd) { return 0; } mutex_lock(&lc->lock); mutex_lock(&lbd->reflock); if (--lbd->ref == 0) { ret = lbd_flush(lbd); if (ret) { printk(KERN_ERR "%s: lbd_flush failed\n", __func__); } } mutex_unlock(&lbd->reflock); mutex_unlock(&lc->lock); return ret; }