/* * Copyright (c) 2019 Tom Marshall * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. */ #include #include #include #include #include #include #include #include #include #include struct lbd { u64 lblk; struct mutex reflock; unsigned int ref; struct mutex lock; enum cache_state state; struct cbd_params* params; struct lbatviewcache* lvc; struct lbatview* lv; void* percpu; struct page* pages; u8* buf; }; /* * Allocating lz4_wrkmem percpu: * * If the alloc is per-instance, it would need to be allocated in compress.c * and passed around. The easiest way to pass it around is likely to make it * part of a struct. We can't use struct compress because that is private. * So we would need to create a struct (say, compress_percpu). * * If the alloc is global, we can just declare it file-local. But it would * need to be the largest possible size. Which means we probably don't want * to use alloc_percpu_gfp() directly, because 1mb chunks are probably not * that common. So suppose we allocate a percpu vector of page ptrs. * * #define COMPRESS_MAX_INPUT_SIZE (1 << LBLK_SHIFT_MAX) * #define COMPRESS_LZ4_BOUND LZ4_COMPRESSBOUND(COMPRESS_MAX_INPUT_SIZE) * #define WRKMEM_PAGES DIV_ROUND_UP(COMPRESS_LZ4_BOUND, PAGE_SIZE) * typedef struct page*[WRKMEM_PAGES] lz4_wrkmem_pagev_t; * * g_lz4_wrkmem = alloc_percpu_gfp(lz4_wrkmem_pagev_t, GFP_IO); * * That's not bad at all. But how do we alloc (and free) the actual pages? * * pagev = get_cpu_var(g_lz4_wrkmem); * put_cpu_var(pagev); * * free_percpu(g_lz4_wrkmem); */ static inline bool lblk_is_zeros(struct cbd_params* params, struct lbd* lbd) { #ifdef CBD_DETECT_ZERO_BLOCKS u32 off; u32 len = PBLK_SIZE * lblk_per_pblk(params); for (off = 0; off < len; ++off) { if (lbd->lblk_buf[off]) { return false; } } return true; #else return false; #endif } struct lblk_compress_state { struct page* pages; u8* buf; #ifdef COMPRESS_HAVE_LZ4 u8* lz4_workmem; #endif #ifdef COMPRESS_HAVE_ZLIB z_stream zlib_cstream; z_stream zlib_dstream; #endif }; static struct lblk_compress_state* lblk_get_compress_state(void* percpu, const struct cbd_params* params, int cpu) { struct lblk_compress_state** statep; statep = per_cpu_ptr(percpu, cpu); return *statep; } #ifdef COMPRESS_HAVE_LZ4 static size_t lblk_compress_lz4(struct lbd* lbd) { int clen; int cpu; struct lblk_compress_state* state; cpu = get_cpu(); state = lblk_get_compress_state(lbd->percpu, lbd->params, cpu); if (!state) { put_cpu(); return 0; } clen = LZ4_compress_fast(lbd->buf, state->buf, PBLK_SIZE * lblk_per_pblk(lbd->params), PBLK_SIZE * (lblk_per_pblk(lbd->params) - 1), lbd->params->compression, state->lz4_workmem); if (clen <= 0) { put_cpu(); return 0; } memcpy(lbd->buf, state->buf, clen); put_cpu(); return (size_t)clen; } static bool lblk_decompress_lz4(struct lbd* lbd, u32 clen) { int ret; int cpu; struct lblk_compress_state* state; u32 dlen = PBLK_SIZE * lblk_per_pblk(lbd->params); cpu = get_cpu(); state = lblk_get_compress_state(lbd->percpu, lbd->params, cpu); if (!state) { put_cpu(); return false; } ret = LZ4_decompress_safe(lbd->buf, state->buf, clen, dlen); if (ret != dlen) { put_cpu(); return false; } memcpy(lbd->buf, state->buf, dlen); put_cpu(); return true; } #endif #ifdef COMPRESS_HAVE_ZLIB static size_t lblk_compress_zlib(struct lbd* lbd) { int ret; int cpu; struct lblk_compress_state* state; z_stream* stream; cpu = get_cpu(); state = lblk_get_compress_state(lbd->percpu, lbd->params, cpu); if (!state) { put_cpu(); return 0; } stream = &state->zlib_cstream; ret = zlib_deflateReset(stream); BUG_ON(ret != Z_OK); stream->next_in = lbd->buf; stream->avail_in = PBLK_SIZE * lblk_per_pblk(lbd->params); stream->next_out = state->buf; stream->avail_out = PBLK_SIZE * (lblk_per_pblk(lbd->params) - 1); ret = zlib_deflate(stream, Z_FINISH); if (ret != Z_STREAM_END) { put_cpu(); return 0; } memcpy(lbd->buf, state->buf, stream->total_out); put_cpu(); return stream->total_out; } static bool lblk_decompress_zlib(struct lbd* lbd, u32 clen) { int ret; int cpu; struct lblk_compress_state* state; z_stream* stream; u32 dlen = PBLK_SIZE * lblk_per_pblk(lbd->params); cpu = get_cpu(); state = lblk_get_compress_state(lbd->percpu, lbd->params, cpu); if (!state) { put_cpu(); return false; } stream = &state->zlib_dstream; ret = zlib_inflateReset(stream); BUG_ON(ret != Z_OK); stream->next_in = lbd->buf; stream->avail_in = clen; stream->next_out = state->buf; stream->avail_out = dlen; ret = zlib_inflate(stream, Z_SYNC_FLUSH); /* See xxx */ if (ret == Z_OK && !stream->avail_in && stream->avail_out) { u8 zerostuff = 0; stream->next_in = &zerostuff; stream->avail_in = 1; ret = zlib_inflate(stream, Z_FINISH); } if (ret != Z_STREAM_END || stream->total_out != dlen) { put_cpu(); return false; } memcpy(lbd->buf, state->buf, dlen); put_cpu(); return true; } #endif /* * Compress dc->lblk into dc->lz4_cbuf * * Returns number of bytes in cbuf or 0 for failure. */ static size_t lblk_compress(struct lbd* lbd) { #ifdef COMPRESS_HAVE_LZ4 if (lbd->params->algorithm == CBD_ALG_LZ4) { return lblk_compress_lz4(lbd); } #endif #ifdef COMPRESS_HAVE_ZLIB if (lbd->params->algorithm == CBD_ALG_ZLIB) { return lblk_compress_zlib(lbd); } #endif return 0; } /* * Decompress dc->lz4_cbuf of size clen into dc->lblk * * Returns 0 for success, <0 for failure. */ static int lblk_decompress(struct lbd* lbd, u32 clen) { #ifdef COMPRESS_HAVE_LZ4 if (lbd->params->algorithm == CBD_ALG_LZ4) { return lblk_decompress_lz4(lbd, clen); } #endif #ifdef COMPRESS_HAVE_ZLIB if (lbd->params->algorithm == CBD_ALG_ZLIB) { return lblk_decompress_zlib(lbd, clen); } #endif return false; } static bool lbd_ctr(struct lbd* lbd, struct cbd_params* params, struct lbatviewcache* lvc, void* percpu) { memset(lbd, 0, sizeof(struct lbd)); lbd->lblk = LBLK_NONE; mutex_init(&lbd->reflock); lbd->ref = 0; mutex_init(&lbd->lock); lbd->state = CACHE_STATE_UNCACHED; lbd->params = params; lbd->lvc = lvc; lbd->lv = NULL; lbd->percpu = percpu; lbd->pages = cbd_alloc_pages(lblk_per_pblk(lbd->params)); if (!lbd->pages) { return false; } lbd->buf = page_address(lbd->pages); return true; } static void lbd_dtr(struct lbd* lbd) { if (lbatviewcache_put(lbd->lvc, lbd->lv) != 0) { printk(KERN_ERR "%s: lbatviewcache_put failed\n", __func__); } lbd->buf = NULL; cbd_free_pages(lbd->pages, lblk_per_pblk(lbd->params)); lbd->pages = NULL; lbd->percpu = NULL; lbd->lv = NULL; lbd->lvc = NULL; } static void lbd_flush_endio(struct bio* bio) { struct lbd* lbd = bio->bi_private; int ret; cbd_free_page(bio->bi_io_vec[0].bv_page); ret = pblk_endio(bio); if (ret) { printk(KERN_ERR "%s: I/O failed\n", __func__); lbd->state = CACHE_STATE_ERROR; } } static int lbd_flush(struct lbd* lbd) { int ret = 0; u32 c_len; u32 elem_len; u8* p; u32 n; u64 pblk; struct page* iopagev[1]; mutex_lock(&lbd->lock); if (lbd->state != CACHE_STATE_DIRTY) { if (lbd->state == CACHE_STATE_ERROR) { ret = -EIO; goto out; } goto clean; } if (lblk_is_zeros(lbd->params, lbd)) { c_len = 0; elem_len = 0; } else { c_len = lblk_compress(lbd); if (c_len > 0) { size_t c_blkrem = c_len % PBLK_SIZE; if (c_blkrem) { memset(lbd->buf + c_len, 0, c_blkrem); } elem_len = c_len; } else { c_len = PBLK_SIZE * lblk_per_pblk(lbd->params); elem_len = CBD_UNCOMPRESSED; } } ret = lbatview_elem_realloc(lbd->lv, lbd->lblk, elem_len); if (ret) { goto out; } p = lbd->buf; for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) { pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n); if (pblk == PBLK_NONE) { ret = -EIO; goto out; } iopagev[0] = cbd_alloc_page(); if (!iopagev[0]) { printk(KERN_ERR "%s: out of memory\n", __func__); ret = -ENOMEM; goto out; } memcpy(page_address(iopagev[0]), p, PBLK_SIZE); pblk_write(lbd->params, pblk, 1, iopagev, lbd_flush_endio, lbd); } clean: ret = lbatviewcache_put(lbd->lvc, lbd->lv); lbd->lv = NULL; if (ret) { lbd->state = CACHE_STATE_ERROR; goto out; } lbd->state = CACHE_STATE_CLEAN; out: mutex_unlock(&lbd->lock); return ret; } static bool lbd_reset(struct lbd* lbd, u64 lblk) { if (lbd->lv) { printk(KERN_ERR "%s: lbatview leak\n", __func__); } lbd->lv = lbatviewcache_get(lbd->lvc, lblk); if (!lbd->lv) { printk(KERN_ERR "%s: lbatviewcache_get failed\n", __func__); return false; } lbd->lblk = lblk; lbd->state = CACHE_STATE_UNCACHED; return true; } int lbd_read(struct lbd* lbd) { int ret = 0; u32 c_len; u64 pblk; struct page* iopagev[1]; mutex_lock(&lbd->lock); if (lbd->state != CACHE_STATE_UNCACHED) { goto out; } ret = lbatview_read(lbd->lv); if (ret) { goto out; } c_len = lbatview_elem_len(lbd->lv, lbd->lblk); if (c_len == 0) { memset(lbd->buf, 0, PBLK_SIZE * lblk_per_pblk(lbd->params)); } else { bool is_compressed = true; u32 d_len = PBLK_SIZE * lblk_per_pblk(lbd->params); u32 n; u8* p; if (c_len == CBD_UNCOMPRESSED) { is_compressed = false; c_len = d_len; } p = lbd->buf; for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) { pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n); if (pblk == PBLK_NONE) { ret = -EIO; goto out; } /* XXX: check pblk not in metadata? */ iopagev[0] = virt_to_page(p); ret = pblk_read_wait(lbd->params, pblk, 1, iopagev); if (ret) { goto out; } } if (is_compressed) { if (!lblk_decompress(lbd, c_len)) { printk(KERN_ERR " decompress failed\n"); ret = -EIO; goto out; } } } lbd->state = CACHE_STATE_CLEAN; out: mutex_unlock(&lbd->lock); return ret; } void lbd_data_read(struct lbd* lbd, u32 off, u32 len, u8* buf) { /* XXX: convert to BUG_ON */ if (off + len > PBLK_SIZE * lblk_per_pblk(lbd->params)) { printk(KERN_ERR "%s: out of bounds\n", __func__); return; } mutex_lock(&lbd->lock); BUG_ON(lbd->state == CACHE_STATE_UNCACHED); memcpy(buf, lbd->buf + off, len); mutex_unlock(&lbd->lock); } void lbd_data_write(struct lbd* lbd, u32 off, u32 len, const u8* buf) { /* XXX: convert to BUG_ON */ if (off + len > PBLK_SIZE * lblk_per_pblk(lbd->params)) { printk(KERN_ERR "%s: out of bounds\n", __func__); return; } mutex_lock(&lbd->lock); BUG_ON(lbd->state == CACHE_STATE_UNCACHED); memcpy(lbd->buf + off, buf, len); lbd->state = CACHE_STATE_DIRTY; mutex_unlock(&lbd->lock); } struct lbdcache { struct mutex lock; struct cbd_params* params; void* percpu; struct lbatviewcache* lvc; unsigned int len; struct lbd** cache; }; size_t lbdcache_size(void) { return sizeof(struct lbdcache); } static bool lbdcache_realloc(struct lbdcache* lc, unsigned int len) { struct lbd** cache; unsigned int n; struct lbd* lbd; cache = kzalloc(len * sizeof(struct lbd*), GFP_KERNEL); if (!cache) { return false; } n = 0; if (lc->len) { memcpy(cache, lc->cache, lc->len * sizeof(struct lbd*)); n = lc->len; kfree(lc->cache); } lc->len = len; lc->cache = cache; while (n < len) { lbd = kmalloc(sizeof(struct lbd), GFP_KERNEL); if (!lbd) { return false; } cache[n++] = lbd; if (!lbd_ctr(lbd, lc->params, lc->lvc, lc->percpu)) { return false; } } return true; } static bool lbdcache_alloc_compress_state(void* percpu, const struct cbd_params* params, int cpu) { struct lblk_compress_state* state; struct lblk_compress_state** statep; size_t workmem_len; #ifdef COMPRESS_HAVE_ZLIB int ret; #endif state = kzalloc(sizeof(struct lblk_compress_state), GFP_NOWAIT); if (!state) { printk(KERN_ERR "%s: failed to alloc state\n", __func__); return false; } statep = per_cpu_ptr(percpu, cpu); *statep = state; state->pages = cbd_alloc_pages_nowait(lblk_per_pblk(params)); if (!state->pages) { return false; } state->buf = page_address(state->pages); #ifdef COMPRESS_HAVE_LZ4 workmem_len = LZ4_compressBound(PBLK_SIZE * lblk_per_pblk(params)); state->lz4_workmem = kzalloc(workmem_len, GFP_NOWAIT); if (!state->lz4_workmem) { return false; } #endif #ifdef COMPRESS_HAVE_ZLIB workmem_len = zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL); state->zlib_cstream.workspace = kzalloc(workmem_len, GFP_NOWAIT); if (!state->zlib_cstream.workspace) { return false; } ret = zlib_deflateInit2(&state->zlib_cstream, params->compression, Z_DEFLATED, MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); BUG_ON(ret != Z_OK); workmem_len = zlib_inflate_workspacesize(); state->zlib_dstream.workspace = kzalloc(workmem_len, GFP_NOWAIT); if (!state->zlib_dstream.workspace) { return false; } ret = zlib_inflateInit2(&state->zlib_dstream, DEF_WBITS); BUG_ON(ret != Z_OK); #endif return true; } static void lbdcache_free_compress_state(void* percpu, const struct cbd_params* params, int cpu) { struct lblk_compress_state** statep; struct lblk_compress_state* state; statep = per_cpu_ptr(percpu, cpu); state = *statep; if (!state) { return; } #ifdef COMPRESS_HAVE_ZLIB kfree(state->zlib_dstream.workspace); kfree(state->zlib_cstream.workspace); #endif #ifdef COMPRESS_HAVE_LZ4 kfree(state->lz4_workmem); #endif cbd_free_pages(state->pages, lblk_per_pblk(params)); kfree(state); } bool lbdcache_ctr(struct lbdcache* lc, struct cbd_params* params) { int cpu; memset(lc, 0, sizeof(struct lbdcache)); mutex_init(&lc->lock); lc->params = params; lc->percpu = alloc_percpu(void*); for (cpu = 0; cpu < num_online_cpus(); ++cpu) { if (!lbdcache_alloc_compress_state(lc->percpu, params, cpu)) { return false; } } lc->lvc = kzalloc(lbatviewcache_size(), GFP_KERNEL); if (!lc->lvc) { return false; } if (!lbatviewcache_ctr(lc->lvc, params)) { return false; } return lbdcache_realloc(lc, 1024); } void lbdcache_dtr(struct lbdcache* lc) { unsigned int n; struct lbd* lbd; int cpu; for (n = 0; n < lc->len; ++n) { lbd = lc->cache[n]; if (!lbd) { continue; } lbd_dtr(lbd); if (lbd->ref) { printk(KERN_ERR "%s: lbd ref leak: n=%u ref=%u\n", __func__, n, lbd->ref); } kfree(lbd); } kfree(lc->cache); lc->cache = NULL; lc->len = 0; lbatviewcache_dtr(lc->lvc); kfree(lc->lvc); lc->lvc = NULL; for (cpu = 0; cpu < num_online_cpus(); ++cpu) { lbdcache_free_compress_state(lc->percpu, lc->params, cpu); } free_percpu(lc->percpu); lc->percpu = NULL; lc->params = NULL; } struct lbd* lbdcache_get(struct lbdcache* lc, u64 lblk) { unsigned int n; struct lbd* lbd; mutex_lock(&lc->lock); for (n = 0; n < lc->len; ++n) { lbd = lc->cache[n]; mutex_lock(&lbd->reflock); if (lbd->lblk == lblk) { if (lbd->ref == 0) { goto found; } ++lbd->ref; mutex_unlock(&lbd->reflock); goto out; } mutex_unlock(&lbd->reflock); } for (n = 0; n < lc->len; ++n) { lbd = lc->cache[n]; mutex_lock(&lbd->reflock); if (lbd->lblk == LBLK_NONE) { goto found; } mutex_unlock(&lbd->reflock); } for (n = 0; n < lc->len; ++n) { lbd = lc->cache[n]; mutex_lock(&lbd->reflock); if (lbd->ref == 0 && lbd->state != CACHE_STATE_ERROR) { goto found; } mutex_unlock(&lbd->reflock); } printk(KERN_INFO "%s: all objects in use, realloc...\n", __func__); n = lc->len; if (!lbdcache_realloc(lc, lc->len * 2)) { printk(KERN_ERR "%s: realloc failed\n", __func__); lbd = NULL; goto out; } printk(KERN_INFO "%s: realloc done, using n=%u\n", __func__, n); lbd = lc->cache[n]; mutex_lock(&lbd->reflock); found: if (!lbd_reset(lbd, lblk)) { mutex_unlock(&lbd->reflock); printk(KERN_ERR "%s: lbd_reset failed\n", __func__); lbd = NULL; goto out; } lbd->ref = 1; mutex_unlock(&lbd->reflock); out: mutex_unlock(&lc->lock); return lbd; } int lbdcache_put(struct lbdcache* lc, struct lbd* lbd) { int ret = 0; if (!lbd) { return 0; } mutex_lock(&lc->lock); mutex_lock(&lbd->reflock); if (--lbd->ref == 0) { ret = lbd_flush(lbd); if (ret) { printk(KERN_ERR "%s: lbd_flush failed\n", __func__); } } mutex_unlock(&lbd->reflock); mutex_unlock(&lc->lock); return ret; }