diff --git a/README b/README index af07ff5..ab79a42 100644 --- a/README +++ b/README @@ -14,11 +14,14 @@ Block device layout: - u16 version_minor /* Offset 8: parameters */ - u16 flags - - u8 algorithm (1=lz4, 2=zlib, ...) [1] - - u8 compression (1..9) [1] + - u8 compression + hi nybble: algorithm (1=lz4, 2=zlib, ...) [1] + lo nybble: level (1..9) [1] - u8 pblk_shift (0..3) [3 = 4kb] - u8 lblk_shift (1..10) [4 = 64kb (*)] - - u16 pbat_len [1] + - u8 lba_elem_pblk_bytes (2, 4, 6) + - u8 pbat_shift [0] + - u8 pad - u32 nr_zones - u32 lblk_per_zone - byte[40] reserved diff --git a/cbd/cbd.c b/cbd/cbd.c index 72517f2..40c6ed3 100644 --- a/cbd/cbd.c +++ b/cbd/cbd.c @@ -131,7 +131,7 @@ usage(void) "\n"); fprintf(stderr, "Commands:\n" " format [opts] Create (format) a compressed device\n" - " -P --pbat-len Physical block allocation table length [1]\n" + " -P --pbat-size Physical block allocation table size [1]\n" " -S --pysical-size Physical size [device size]\n" " -c --compress-factor Compression factor [2.0]\n" " -l --logical-blksize Logical block size\n" @@ -169,7 +169,7 @@ do_format(int argc, char** argv) { static const char short_opts[] = "P:S:c:l:p:s:z:Z:"; static const struct option long_opts[] = { - { "pbat-len", required_argument, NULL, 'P' }, + { "pbat-size", required_argument, NULL, 'P' }, { "physical-size", required_argument, NULL, 'S' }, { "compress-factor", required_argument, NULL, 'c' }, { "logical-blksize", required_argument, NULL, 'l' }, @@ -186,12 +186,13 @@ do_format(int argc, char** argv) uint64_t lsize = 0; uint pblksize = PAGE_SIZE; uint lblksize = 16 * PAGE_SIZE; - uint16_t pbatlen = 1; + uint pbatsize = 1; enum cbd_alg alg = CBD_ALG_LZ4; uint level = 1; uint8_t pshift; uint8_t lshift; + uint8_t pbatshift; const char* dev; @@ -204,7 +205,7 @@ do_format(int argc, char** argv) if (optval < 1) { error("Size \"%s\" is not a valid pbat len\n", optarg); } - pbatlen = optval; + pbatsize = optval; break; case 'S': if (!parse_numeric_arg(optarg, &optval)) { @@ -277,12 +278,16 @@ do_format(int argc, char** argv) if (lshift < LBLK_SHIFT_MIN || lshift > LBLK_SHIFT_MAX) { error("Invalid logical block size %u\n", lblksize); } + pbatshift = get_shift(pbatsize, 1); + if (pbatshift < PBAT_SHIFT_MIN || pbatshift > PBAT_SHIFT_MAX) { + error("Invalid pbat len %u\n", pbatsize); + } if (argc - optind != 1) { usage(); } dev = argv[optind++]; - cbd_format(dev, pshift, lshift, pbatlen, alg, level, psize, lsize); + cbd_format(dev, alg, level, pshift, lshift, pbatshift, psize, lsize); return 0; } diff --git a/dm-compress/compress.c b/dm-compress/compress.c index 8e4b83c..f87ad4c 100644 --- a/dm-compress/compress.c +++ b/dm-compress/compress.c @@ -78,25 +78,23 @@ static int compress_read_header(struct compress* c) { int ret = 0; - struct page* pblkpage; - u8 *pblkbuf; - struct page* iopagev[1]; + struct page* page; + u8 *buf; struct cbd_header header; - pblkpage = cbd_alloc_page(); - if (!pblkpage) { + page = cbd_alloc_page(); + if (!page) { return -ENOMEM; } - pblkbuf = page_address(pblkpage); - iopagev[0] = pblkpage; + buf = page_address(page); - ret = pblk_read_wait(&c->kparams, 0, 1, iopagev); + ret = pblk_read_wait(&c->kparams, 0, 1, page); if (ret) { printk(KERN_ERR "%s: failed to read header\n", __func__); goto out; } memset(&header, 0, sizeof(header)); - cbd_header_get(pblkbuf, &header); + cbd_header_get(buf, &header); if (memcmp(header.magic, CBD_MAGIC, sizeof(header.magic)) != 0) { printk(KERN_ERR "%s: bad magic\n", __func__); @@ -158,13 +156,23 @@ compress_read_header(struct compress* c) ret = -EINVAL; goto out; } + if (pbat_len(&header.params) * pblk_size(&header.params) > PAGE_SIZE) { + printk(KERN_ERR "%s: pbat size too large\n", __func__); + ret = -EINVAL; + goto out; + } + if (lba_len(&header.params) > pblk_size(&header.params)) { + printk(KERN_ERR "%s: lba elem size too large\n", __func__); + ret = -EINVAL; + goto out; + } printk(KERN_INFO "%s: parameters...\n", __func__); printk(KERN_INFO " compression=0x%02x\n", (unsigned int)header.params.compression); printk(KERN_INFO " pblk_shift=%hu\n", (unsigned short)header.params.pblk_shift); printk(KERN_INFO " lblk_shift=%hu\n", (unsigned short)header.params.lblk_shift); printk(KERN_INFO " lba_elem_pblk_bytes=%hu\n", (unsigned short)header.params.lba_elem_pblk_bytes); - printk(KERN_INFO " pbat_len=%hu\n", (unsigned short)header.params.pbat_len); + printk(KERN_INFO " pbat_shift=%hu\n", (unsigned short)header.params.pbat_shift); printk(KERN_INFO " nr_zones=%u\n", (unsigned int)header.params.nr_zones); printk(KERN_INFO " lblk_per_zone=%u\n", (unsigned int)header.params.lblk_per_zone); printk(KERN_INFO "%s: stats...\n", __func__); @@ -175,7 +183,7 @@ compress_read_header(struct compress* c) memcpy(&c->kstats.stats, &header.stats, sizeof(header.stats)); out: - cbd_free_page(pblkpage); + cbd_free_page(page); return ret; } @@ -183,29 +191,27 @@ static int compress_write_header(struct compress* c) { int ret = 0; - struct page* pblkpage; - u8* pblkbuf; + struct page* page; + u8* buf; struct cbd_header header; - struct page* iopagev[1]; - pblkpage = cbd_alloc_page(); - if (!pblkpage) { + page = cbd_alloc_page(); + if (!page) { return -ENOMEM; } - pblkbuf = page_address(pblkpage); + buf = page_address(page); memset(&header, 0, sizeof(header)); memcpy(header.magic, CBD_MAGIC, sizeof(header.magic)); header.version_major = CBD_VERSION_MAJOR; header.version_minor = CBD_VERSION_MINOR; memcpy(&header.params, &c->kparams.params, sizeof(header.params)); memcpy(&header.stats, &c->kstats.stats, sizeof(header.stats)); - cbd_header_put(pblkbuf, &header); - iopagev[0] = pblkpage; - ret = pblk_write_wait(&c->kparams, 0, 1, iopagev); + cbd_header_put(buf, &header); + ret = pblk_write_wait(&c->kparams, 0, 1, page); if (ret) { printk(KERN_ERR "%s: failed to write header\n", __func__); } - cbd_free_page(pblkpage); + cbd_free_page(page); return ret; } diff --git a/dm-compress/lbatpblk.c b/dm-compress/lbatpblk.c index 4868e7d..0122212 100644 --- a/dm-compress/lbatpblk.c +++ b/dm-compress/lbatpblk.c @@ -69,6 +69,9 @@ lbatpblk_ctr(struct lbatpblk* lp, static void lbatpblk_dtr(struct lbatpblk* lp) { + if (lp->page) { + lock_page(lp->page); + } lp->buf = NULL; cbd_free_page(lp->page); lp->page = NULL; @@ -84,7 +87,6 @@ static int lbatpblk_flush(struct lbatpblk* lp) { int ret = 0; - struct page* iopagev[1]; mutex_lock(&lp->lock); if (!PageDirty(lp->page)) { @@ -94,8 +96,7 @@ lbatpblk_flush(struct lbatpblk* lp) ret = -EIO; goto unlock; } - iopagev[0] = lp->page; - pblk_write(lp->kparams, lp->pblk, 1, iopagev); + pblk_write(lp->kparams, lp->pblk, 1, lp->page); mutex_unlock(&lp->lock); mutex_lock(&lp->kstats->lock); ++lp->kstats->lbatpblk_w; @@ -114,10 +115,9 @@ static int lbatpblk_read(struct lbatpblk* lp) { int ret = 0; - struct page* pagev[1]; - pagev[0] = lp->page; - ret = pblk_read_wait(lp->kparams, lp->pblk, 1, pagev); + BUG_ON(lbatpblk_error(lp)); + ret = pblk_read_wait(lp->kparams, lp->pblk, 1, lp->page); mutex_lock(&lp->kstats->lock); ++lp->kstats->lbatpblk_r; mutex_unlock(&lp->kstats->lock); diff --git a/dm-compress/lbatview.c b/dm-compress/lbatview.c index 30e6eb3..fc64cf7 100644 --- a/dm-compress/lbatview.c +++ b/dm-compress/lbatview.c @@ -272,13 +272,11 @@ lbatview_elem_off(struct lbatview* lv, u64 lblk) static void lbatview_rmem(struct lbatview* lv, u32 off, u32 len, void* buf) { - /* XXX: Convert below to a BUG_ON */ - if (off + len > 2 * PAGE_SIZE) { - printk(KERN_ERR "%s: *** out of bounds\n", __func__); - return; - } - if (off < PAGE_SIZE && off + len > PAGE_SIZE) { - u32 len0 = PAGE_SIZE - off; + u32 pblk_len = pblk_size(&lv->kparams->params); + + BUG_ON(off + len > 2 * pblk_len); + if (off < pblk_len && off + len > pblk_len) { + u32 len0 = pblk_len - off; u8* pagebuf0 = lbatpblk_get_buf(lv->pages[0], false); u8* pagebuf1 = lbatpblk_get_buf(lv->pages[1], false); memcpy(buf, pagebuf0 + off, len0); @@ -287,8 +285,8 @@ lbatview_rmem(struct lbatview* lv, u32 off, u32 len, void* buf) lbatpblk_put_buf(lv->pages[0]); } else { - u32 bufidx = off / PAGE_SIZE; - u32 bufoff = off % PAGE_SIZE; + u32 bufidx = off / pblk_len; + u32 bufoff = off % pblk_len; u8* pagebuf = lbatpblk_get_buf(lv->pages[bufidx], false); memcpy(buf, pagebuf + bufoff, len); lbatpblk_put_buf(lv->pages[bufidx]); @@ -298,13 +296,11 @@ lbatview_rmem(struct lbatview* lv, u32 off, u32 len, void* buf) static void lbatview_wmem(struct lbatview* lv, u32 off, u32 len, void* buf) { - /* XXX: Convert below to a BUG_ON */ - if (off + len > 2 * PAGE_SIZE) { - printk(KERN_ERR "%s: *** out of bounds\n", __func__); - return; - } - if (off < PAGE_SIZE && off + len > PAGE_SIZE) { - u32 len0 = PAGE_SIZE - off; + u32 pblk_len = pblk_size(&lv->kparams->params); + + BUG_ON(off + len > 2 * pblk_len); + if (off < pblk_len && off + len > pblk_len) { + u32 len0 = pblk_len - off; u8* pagebuf0 = lbatpblk_get_buf(lv->pages[0], true); u8* pagebuf1 = lbatpblk_get_buf(lv->pages[1], true); memcpy(pagebuf0 + off, buf, len0); @@ -313,8 +309,8 @@ lbatview_wmem(struct lbatview* lv, u32 off, u32 len, void* buf) lbatpblk_put_buf(lv->pages[0]); } else { - u32 bufidx = off / PAGE_SIZE; - u32 bufoff = off % PAGE_SIZE; + u32 bufidx = off / pblk_len; + u32 bufoff = off % pblk_len; u8* pagebuf = lbatpblk_get_buf(lv->pages[bufidx], true); memcpy(pagebuf + bufoff, buf, len); lbatpblk_put_buf(lv->pages[bufidx]); diff --git a/dm-compress/lbd.c b/dm-compress/lbd.c index 8222d0f..d268a49 100644 --- a/dm-compress/lbd.c +++ b/dm-compress/lbd.c @@ -259,7 +259,7 @@ lbd_ctr(struct lbd* lbd, struct lbatviewcache* lvc, void* percpu) { - u32 nr_pages = lblk_per_pblk(&kparams->params); + u32 nr_pages = DIV_ROUND_UP(lblk_size(&kparams->params), PAGE_SIZE); memset(lbd, 0, sizeof(struct lbd)); INIT_LIST_HEAD(&lbd->lru_list); @@ -281,9 +281,14 @@ lbd_ctr(struct lbd* lbd, if (!cbd_alloc_pagev(lbd->pagev, nr_pages)) { return false; } - lbd->buf = vmap(lbd->pagev, nr_pages, VM_MAP, PAGE_KERNEL); - if (!lbd->buf) { - return false; + if (nr_pages == 1) { + lbd->buf = page_address(lbd->pagev[0]); + } + else { + lbd->buf = vmap(lbd->pagev, nr_pages, VM_MAP, PAGE_KERNEL); + if (!lbd->buf) { + return false; + } } lbd->c_len = 0; @@ -293,17 +298,30 @@ lbd_ctr(struct lbd* lbd, static void lbd_dtr(struct lbd* lbd) { - u32 nr_pages = lblk_per_pblk(&lbd->kparams->params); + u32 nr_pages = DIV_ROUND_UP(lblk_size(&lbd->kparams->params), PAGE_SIZE); + u32 n; + + if (lbd->pagev) { + for (n = 0; n < nr_pages; ++n) { + if (lbd->pagev[n]) { + lock_page(lbd->pagev[n]); + } + } + } if (lbatviewcache_put(lbd->lvc, lbd->lv) != 0) { printk(KERN_ERR "%s: lbatviewcache_put failed\n", __func__); } lbd->c_len = 0; - vunmap(lbd->buf); + if (nr_pages != 1) { + vunmap(lbd->buf); + } lbd->buf = NULL; - cbd_free_pagev(lbd->pagev, nr_pages); - kfree(lbd->pagev); - lbd->pagev = NULL; + if (lbd->pagev) { + cbd_free_pagev(lbd->pagev, nr_pages); + kfree(lbd->pagev); + lbd->pagev = NULL; + } lbd->percpu = NULL; lbd->lv = NULL; lbd->lvc = NULL; @@ -319,12 +337,15 @@ static int lbd_flush(struct lbd* lbd) { int ret = 0; + u32 nr_pages = DIV_ROUND_UP(lblk_size(&lbd->kparams->params), PAGE_SIZE); + u32 pblk_per_page = PAGE_SIZE / pblk_size(&lbd->kparams->params); int err; + u32 nr_pblk; + u32 pblk_idx; + u32 pg_idx; + u64 pblkv[PBLK_IOV_MAX]; + u32 iov_len; u32 n; - u64 pblk; - u32 nr_pages = lblk_per_pblk(&lbd->kparams->params); - u32 count; - struct page* iopagev[1]; mutex_lock(&lbd->lock); if (!PageDirty(lbd->pagev[0])) { @@ -346,30 +367,30 @@ lbd_flush(struct lbd* lbd) if (c_blkrem) { memset(lbd->buf + lbd->c_len, 0, c_blkrem); } - count = DIV_ROUND_UP(lbd->c_len, pblk_size(&lbd->kparams->params)); + nr_pblk = DIV_ROUND_UP(lbd->c_len, pblk_size(&lbd->kparams->params)); } else { lbd->c_len = CBD_UNCOMPRESSED; - count = lblk_per_pblk(&lbd->kparams->params); + nr_pblk = lblk_per_pblk(&lbd->kparams->params); } ret = lbatview_elem_realloc(lbd->lv, lbd->lblk, lbd->c_len); if (ret) { lbd->kparams->params.flags |= CBD_FLAG_ERROR; goto unlock; } - for (n = 0; n < count; ++n) { - pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n); - BUG_ON(pblk == PBLK_NONE); - iopagev[0] = lbd->pagev[n]; - pblk_write(lbd->kparams, pblk, 1, iopagev); + for (pblk_idx = 0, pg_idx = 0; pblk_idx < nr_pblk; ++pg_idx) { + iov_len = min(nr_pblk - pblk_idx, pblk_per_page); + for (n = 0; n < iov_len; ++n) { + pblkv[n] = lbatview_elem_pblk(lbd->lv, lbd->lblk, pblk_idx++); + } + pblk_writev(lbd->kparams, pblkv, iov_len, lbd->pagev[pg_idx]); + } + while (pg_idx < nr_pages) { + unlock_page(lbd->pagev[pg_idx++]); } mutex_lock(&lbd->kstats->lock); ++lbd->kstats->lbd_w; mutex_unlock(&lbd->kstats->lock); - while (n < lblk_per_pblk(&lbd->kparams->params)) { - unlock_page(lbd->pagev[n]); - ++n; - } goto out; unlock: @@ -392,39 +413,36 @@ static int lbd_read(struct lbd* lbd) { int ret = 0; - u32 count; + u32 pblk_per_page = PAGE_SIZE / pblk_size(&lbd->kparams->params); + u32 nr_pblk; + u32 pblk_idx; + u32 pg_idx; + u64 pblkv[PBLK_IOV_MAX]; + u32 iov_len; u32 n; - u64 pblk; - struct page* iopagev[1]; - /* XXX: can't happen because lbdcache will not use a page with an error */ - if (PageError(lbd->pagev[0])) { - return -EIO; - } + BUG_ON(lbd_error(lbd)); lbd->c_len = lbatview_elem_len(lbd->lv, lbd->lblk); if (lbd->c_len == 0) { memset(lbd->buf, 0, lblk_size(&lbd->kparams->params)); } else { - count = (lbd->c_len == CBD_UNCOMPRESSED) ? + nr_pblk = (lbd->c_len == CBD_UNCOMPRESSED) ? lblk_per_pblk(&lbd->kparams->params) : DIV_ROUND_UP(lbd->c_len, pblk_size(&lbd->kparams->params)); - for (n = 0; n < count; ++n) { - pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n); - if (pblk == PBLK_NONE) { - ret = -EIO; - goto out; + for (pblk_idx = 0, pg_idx = 0; pblk_idx < nr_pblk; ++pg_idx) { + iov_len = min(nr_pblk - pblk_idx, pblk_per_page); + for (n = 0; n < iov_len; ++n) { + pblkv[n] = lbatview_elem_pblk(lbd->lv, lbd->lblk, pblk_idx++); } - iopagev[0] = lbd->pagev[n]; - /* XXX: Issue non-blocking reads? */ - ret = pblk_read_wait(lbd->kparams, pblk, 1, iopagev); + ret = pblk_readv_wait(lbd->kparams, pblkv, iov_len, lbd->pagev[pg_idx]); if (ret) { goto out; } } if (lbd->c_len != CBD_UNCOMPRESSED) { if (!lblk_decompress(lbd)) { - printk(KERN_ERR " decompress failed\n"); + printk(KERN_ERR "%s: decompress failed\n", __func__); ret = -EIO; goto out; } @@ -443,7 +461,7 @@ static int lbd_reset(struct lbd* lbd, u64 lblk) { int ret = 0; - u32 nr_pages = lblk_per_pblk(&lbd->kparams->params); + u32 nr_pages = DIV_ROUND_UP(lblk_size(&lbd->kparams->params), PAGE_SIZE); u32 n; if (lbd->lv) { printk(KERN_ERR "%s: lbatview leak\n", __func__); } @@ -496,11 +514,7 @@ lbd_lblk(struct lbd* lbd) void lbd_data_read(struct lbd* lbd, u32 off, u32 len, u8* buf) { - /* XXX: convert to BUG_ON */ - if (off + len > lblk_size(&lbd->kparams->params)) { - printk(KERN_ERR "%s: out of bounds\n", __func__); - return; - } + BUG_ON(off + len > lblk_size(&lbd->kparams->params)); mutex_lock(&lbd->lock); memcpy(buf, lbd->buf + off, len); mutex_unlock(&lbd->lock); @@ -509,11 +523,7 @@ lbd_data_read(struct lbd* lbd, u32 off, u32 len, u8* buf) void lbd_data_write(struct lbd* lbd, u32 off, u32 len, const u8* buf) { - /* XXX: convert to BUG_ON */ - if (off + len > lblk_size(&lbd->kparams->params)) { - printk(KERN_ERR "%s: out of bounds\n", __func__); - return; - } + BUG_ON(off + len > lblk_size(&lbd->kparams->params)); mutex_lock(&lbd->lock); memcpy(lbd->buf + off, buf, len); SetPageDirty(lbd->pagev[0]); @@ -564,7 +574,7 @@ lbdcache_alloc_compress_state(void* percpu, const struct cbd_params* params, int return false; } #ifdef COMPRESS_HAVE_LZ4 - workmem_len = LZ4_compressBound(lblk_size(params)); + workmem_len = LZ4_MEM_COMPRESS; state->lz4_workmem = vzalloc(workmem_len); if (!state->lz4_workmem) { return false; diff --git a/dm-compress/pbat.c b/dm-compress/pbat.c index e68e3e8..6349f70 100644 --- a/dm-compress/pbat.c +++ b/dm-compress/pbat.c @@ -40,7 +40,7 @@ struct pbat { struct compress_stats* kstats; bool full; u32 last_alloc; - struct page** pagev; + struct page* page; u8* buf; }; @@ -49,8 +49,6 @@ pbat_ctr(struct pbat* pbat, struct compress_params* kparams, struct compress_stats* kstats) { - u32 nr_pages = pbat_len(&kparams->params); - memset(pbat, 0, sizeof(struct pbat)); INIT_LIST_HEAD(&pbat->list); pbat->zone = ZONE_NONE; @@ -61,22 +59,11 @@ pbat_ctr(struct pbat* pbat, pbat->kstats = kstats; pbat->full = false; pbat->last_alloc = 0; - pbat->pagev = kzalloc(nr_pages * sizeof(struct page*), GFP_KERNEL); - if (!pbat->pagev) { + pbat->page = cbd_alloc_page(); + if (!pbat->page) { return false; } - if (!cbd_alloc_pagev(pbat->pagev, nr_pages)) { - return false; - } - if (nr_pages == 1) { - pbat->buf = page_address(pbat->pagev[0]); - } - else { - pbat->buf = vmap(pbat->pagev, nr_pages, VM_MAP, PAGE_KERNEL); - if (!pbat->buf) { - return false; - } - } + pbat->buf = page_address(pbat->page); return true; } @@ -84,37 +71,28 @@ pbat_ctr(struct pbat* pbat, static void pbat_dtr(struct pbat* pbat) { - u32 nr_pages = pbat_len(&pbat->kparams->params); - u32 n; - - for (n = 0; n < nr_pages; ++n) { - lock_page(pbat->pagev[n]); - } - if (nr_pages != 1) { - vunmap(pbat->buf); + if (pbat->page) { + lock_page(pbat->page); } pbat->buf = NULL; - cbd_free_pagev(pbat->pagev, nr_pages); - kfree(pbat->pagev); - pbat->pagev = NULL; + cbd_free_page(pbat->page); + pbat->page = NULL; } static bool pbat_error(struct pbat* pbat) { - return PageError(pbat->pagev[0]); + return PageError(pbat->page); } static int pbat_flush(struct pbat* pbat) { int ret = 0; - u32 nr_pages = pbat_len(&pbat->kparams->params); - u32 n; u64 pblk; mutex_lock(&pbat->lock); - if (!PageDirty(pbat->pagev[0])) { + if (!PageDirty(pbat->page)) { goto unlock; } if (pbat_error(pbat)) { @@ -122,7 +100,7 @@ pbat_flush(struct pbat* pbat) goto unlock; } pblk = pbat_off(&pbat->kparams->params, pbat->zone); - pblk_write(pbat->kparams, pblk, nr_pages, pbat->pagev); + pblk_write(pbat->kparams, pblk, pbat_len(&pbat->kparams->params), pbat->page); mutex_unlock(&pbat->lock); mutex_lock(&pbat->kstats->lock); ++pbat->kstats->pbat_w; @@ -131,9 +109,7 @@ pbat_flush(struct pbat* pbat) return ret; unlock: - for (n = 0; n < nr_pages; ++n) { - unlock_page(pbat->pagev[n]); - } + unlock_page(pbat->page); mutex_unlock(&pbat->lock); return ret; @@ -143,15 +119,11 @@ static int pbat_read(struct pbat* pbat) { int ret = 0; - u32 nr_pages = pbat_len(&pbat->kparams->params); u64 pblk; - /* XXX: can't happen because pbatcache will not use a page with an error */ - if (PageError(pbat->pagev[0])) { - return -EIO; - } + BUG_ON(pbat_error(pbat)); pblk = pbat_off(&pbat->kparams->params, pbat->zone); - ret = pblk_read_wait(pbat->kparams, pblk, nr_pages, pbat->pagev); + ret = pblk_read_wait(pbat->kparams, pblk, pbat_len(&pbat->kparams->params), pbat->page); mutex_lock(&pbat->kstats->lock); ++pbat->kstats->pbat_r; mutex_unlock(&pbat->kstats->lock); @@ -163,12 +135,8 @@ static int pbat_reset(struct pbat* pbat, u32 zone) { int ret = 0; - u32 nr_pages = pbat_len(&pbat->kparams->params); - u32 n; - for (n = 0; n < nr_pages; ++n) { - lock_page(pbat->pagev[n]); - } + lock_page(pbat->page); if (pbat->zone != zone) { pbat->zone = zone; pbat->full = false; @@ -177,9 +145,7 @@ pbat_reset(struct pbat* pbat, u32 zone) } if (ret) { - for (n = 0; n < nr_pages; ++n) { - unlock_page(pbat->pagev[n]); - } + unlock_page(pbat->page); pbat->zone = ZONE_NONE; } @@ -213,7 +179,7 @@ pbat_alloc(struct pbat* pbat) } pbat->last_alloc = idx; pblk = idx + zone_data_off(&pbat->kparams->params, pbat->zone); - SetPageDirty(pbat->pagev[0]); + SetPageDirty(pbat->page); out: mutex_unlock(&pbat->lock); @@ -239,7 +205,7 @@ pbat_free(struct pbat* pbat, u64 pblk) mutex_lock(&pbat->lock); cbd_bitmap_free(pbat->buf, idx); pbat->full = false; - SetPageDirty(pbat->pagev[0]); + SetPageDirty(pbat->page); mutex_unlock(&pbat->lock); return 0; diff --git a/dm-compress/util.c b/dm-compress/util.c index 10a0faf..160bfee 100644 --- a/dm-compress/util.c +++ b/dm-compress/util.c @@ -99,12 +99,12 @@ cbd_free_pagev(struct page** pagev, size_t len) static struct bio* pblk_io_prepare(struct block_device* bdev, unsigned int op, - u32 pblk_size, u64 pblk, u32 count, struct page** pagev) + u32 pblk_len, u64 pblk, u32 count, struct page* page, u32 page_off) { struct bio* bio; - u32 n; - bio = bio_alloc(GFP_KERNEL, count); + BUG_ON(page_off + pblk_len * count > PAGE_SIZE); + bio = bio_alloc(GFP_KERNEL, 1); if (!bio) { printk(KERN_ERR "%s: out of memory\n", __func__); return NULL; @@ -112,11 +112,9 @@ pblk_io_prepare(struct block_device* bdev, unsigned int op, bio_set_dev(bio, bdev); bio->bi_opf = op; - bio->bi_iter.bi_sector = pblk * (pblk_size / SECTOR_SIZE); - for (n = 0; n < count; ++n) { - if (bio_add_page(bio, pagev[n], pblk_size, 0) != pblk_size) { - BUG(); - } + bio->bi_iter.bi_sector = pblk * (pblk_len / SECTOR_SIZE); + if (bio_add_page(bio, page, pblk_len * count, page_off) == 0) { + BUG(); } return bio; @@ -124,13 +122,13 @@ pblk_io_prepare(struct block_device* bdev, unsigned int op, int pblk_read_wait(struct compress_params* kparams, - u64 pblk, u32 count, struct page** pagev) + u64 pblk, u32 count, struct page* page) { int ret; struct bio* bio; bio = pblk_io_prepare(kparams->dev, REQ_OP_READ, - pblk_size(&kparams->params), pblk, count, pagev); + pblk_size(&kparams->params), pblk, count, page, 0); if (!bio) { printk(KERN_ERR "%s: out of memory\n", __func__); return -ENOMEM; @@ -144,15 +142,44 @@ pblk_read_wait(struct compress_params* kparams, return ret; } +int +pblk_readv_wait(struct compress_params* kparams, + u64* pblkv, u32 count, struct page* page) +{ + int ret = 0; + u32 pblk_len = pblk_size(&kparams->params); + u32 n; + u32 page_off; + struct bio* bio; + + /* XXX: Issue no-blocking reads for parallelism? */ + for (n = 0, page_off = 0; n < count; ++n, page_off += pblk_len) { + bio = pblk_io_prepare(kparams->dev, REQ_OP_READ, + pblk_len, pblkv[n], 1, page, page_off); + if (!bio) { + printk(KERN_ERR "%s: out of memory\n", __func__); + return -ENOMEM; + } + ret = submit_bio_wait(bio); + if (ret) { + printk(KERN_ERR "%s: submit_bio_wait failed: %d\n", __func__, ret); + return ret; + } + bio_put(bio); + } + + return ret; +} + int pblk_write_wait(struct compress_params* kparams, - u64 pblk, u32 count, struct page** pagev) + u64 pblk, u32 count, struct page* page) { int ret; struct bio* bio; bio = pblk_io_prepare(kparams->dev, REQ_OP_WRITE, - pblk_size(&kparams->params), pblk, count, pagev); + pblk_size(&kparams->params), pblk, count, page, 0); if (!bio) { printk(KERN_ERR "%s: out of memory\n", __func__); return -ENOMEM; @@ -171,45 +198,120 @@ void pblk_write_endio(struct bio* bio) { struct compress_params* kparams = bio->bi_private; - u32 n; - struct page* page; + struct page* page = bio->bi_io_vec[0].bv_page; - BUG_ON(!bio); if (bio->bi_status != BLK_STS_OK) { + printk(KERN_ERR "%s: I/O error\n", __func__); kparams->params.flags |= CBD_FLAG_ERROR; - for (n = 0; n < bio->bi_max_vecs; ++n) { - page = bio->bi_io_vec[n].bv_page; - SetPageError(page); - } - } - for (n = 0; n < bio->bi_max_vecs; ++n) { - page = bio->bi_io_vec[n].bv_page; - ClearPageDirty(page); - unlock_page(page); + SetPageError(page); } + ClearPageDirty(page); + unlock_page(page); bio_put(bio); } void pblk_write(struct compress_params* kparams, - u64 pblk, u32 count, struct page** pagev) + u64 pblk, u32 count, struct page* page) { struct bio* bio; - u32 n; bio = pblk_io_prepare(kparams->dev, REQ_OP_WRITE, - pblk_size(&kparams->params), pblk, count, pagev); + pblk_size(&kparams->params), pblk, count, page, 0); if (!bio) { printk(KERN_ERR "%s: out of memory\n", __func__); kparams->params.flags |= CBD_FLAG_ERROR; - for (n = 0; n < count; ++n) { - SetPageError(pagev[n]); - unlock_page(pagev[n]); - } + SetPageError(page); + unlock_page(page); return; } bio->bi_end_io = pblk_write_endio; bio->bi_private = kparams; - submit_bio(bio); } + +struct pblk_iov +{ + struct compress_params* kparams; + atomic_t remain; +}; + +void +pblk_writev_endio(struct bio* bio) +{ + struct pblk_iov* iov = bio->bi_private; + struct compress_params* kparams = iov->kparams; + struct page* page = bio->bi_io_vec[0].bv_page; + + if (bio->bi_status != BLK_STS_OK) { + printk(KERN_ERR "%s: I/O error\n", __func__); + kparams->params.flags |= CBD_FLAG_ERROR; + SetPageError(page); + } + if (atomic_dec_and_test(&iov->remain)) { + ClearPageDirty(page); + unlock_page(page); + kfree(iov); + } + bio_put(bio); +} + +void +pblk_writev(struct compress_params* kparams, + u64* pblkv, u32 count, struct page* page) +{ + u32 pblk_len = pblk_size(&kparams->params); + struct pblk_iov* iov; + u32 idx; + u32 page_off; + u32 nr_bio; + u64 pblk; + u32 iov_nr_pblk; + struct bio* bio; + + BUG_ON(pblk_len * count > PAGE_SIZE); + iov = kmalloc(sizeof(struct pblk_iov), GFP_KERNEL); + if (!iov) { + printk(KERN_ERR "%s: out of memory\n", __func__); + goto err; + } + iov->kparams = kparams; + atomic_set(&iov->remain, count); + idx = 0; + page_off = 0; + nr_bio = 0; + while (idx < count) { + pblk = pblkv[idx]; + iov_nr_pblk = 1; + ++idx; + while (idx < count && pblkv[idx] == pblk + iov_nr_pblk) { + ++iov_nr_pblk; + ++idx; + } + bio = pblk_io_prepare(kparams->dev, REQ_OP_WRITE, + pblk_len, pblk, iov_nr_pblk, page, page_off); + if (!bio) { + printk(KERN_ERR "%s: out of memory\n", __func__); + goto err_free; + } + ++nr_bio; + bio->bi_end_io = pblk_writev_endio; + bio->bi_private = iov; + submit_bio(bio); + page_off += pblk_len * iov_nr_pblk; + } + if (atomic_sub_and_test(count - nr_bio, &iov->remain)) { + ClearPageDirty(page); + unlock_page(page); + kfree(iov); + } + + return; + +err_free: + kfree(iov); +err: + kparams->params.flags |= CBD_FLAG_ERROR; + SetPageError(page); + unlock_page(page); +} diff --git a/include/cbdutil.h b/include/cbdutil.h index 2af81f1..a3d4e53 100644 --- a/include/cbdutil.h +++ b/include/cbdutil.h @@ -41,6 +41,6 @@ void __attribute__((noreturn)) error(const char* fmt, ...); int verbose(uint level, const char* fmt, ...); -bool ask_user_bool(const char* fmt, ...); +bool ask_user_bool(tristate_t auto_response, const char* fmt, ...); #endif diff --git a/include/libcbd.h b/include/libcbd.h index 2b8c596..83f7f1c 100644 --- a/include/libcbd.h +++ b/include/libcbd.h @@ -66,9 +66,9 @@ typedef enum { } tristate_t; int cbd_format(const char* dev, - uint8_t pshift, uint8_t lshift, - uint16_t pbatlen, enum cbd_alg alg, uint level, + uint8_t pshift, uint8_t lshift, + uint8_t pbatshift, uint64_t psize, uint64_t lsize); int cbd_open(const char* dev, const char* name); diff --git a/include/linux/dm-compress.h b/include/linux/dm-compress.h index 4b35dbf..b1547d5 100644 --- a/include/linux/dm-compress.h +++ b/include/linux/dm-compress.h @@ -5,6 +5,8 @@ #define PBLK_SHIFT_MAX 3 #define LBLK_SHIFT_MIN 1 #define LBLK_SHIFT_MAX 10 +#define PBAT_SHIFT_MIN 0 +#define PBAT_SHIFT_MAX 3 #define ZONE_NONE (u32)(~0) #define PBLK_NONE (u64)(~0) @@ -35,7 +37,8 @@ struct cbd_params { u8 pblk_shift; u8 lblk_shift; u8 lba_elem_pblk_bytes; - u16 pbat_len; + u8 pbat_shift; + /* u8 pad */ u32 nr_zones; u32 lblk_per_zone; }; @@ -253,7 +256,7 @@ lblk_size(const struct cbd_params* params) static inline u32 pbat_len(const struct cbd_params* params) { - return params->pbat_len; + return (1 << params->pbat_shift); } static inline u32 @@ -356,7 +359,8 @@ cbd_header_get(const u8* buf, struct cbd_header* header) header->params.pblk_shift = get_byte(&p); header->params.lblk_shift = get_byte(&p); header->params.lba_elem_pblk_bytes = get_byte(&p); - header->params.pbat_len = get16_le(&p); + header->params.pbat_shift = get_byte(&p); + p += 1; /* pad */ header->params.nr_zones = get32_le(&p); header->params.lblk_per_zone = get32_le(&p); p = buf + 64; @@ -377,7 +381,8 @@ cbd_header_put(u8* buf, const struct cbd_header* header) put_byte(&p, header->params.pblk_shift); put_byte(&p, header->params.lblk_shift); put_byte(&p, header->params.lba_elem_pblk_bytes); - put16_le(&p, header->params.pbat_len); + put_byte(&p, header->params.pbat_shift); + put_byte(&p, 0); /* pad */ put32_le(&p, header->params.nr_zones); put32_le(&p, header->params.lblk_per_zone); p = buf + 64; @@ -577,12 +582,17 @@ bool cbd_alloc_pagev(struct page** pagev, size_t len); void cbd_free_pagev(struct page** pagev, size_t len); /* Core low-level I/O */ +#define PBLK_IOV_MAX (PAGE_SIZE / SECTOR_SIZE) int pblk_read_wait(struct compress_params* kparams, - u64 pblk, u32 count, struct page** pagev); + u64 pblk, u32 count, struct page* page); +int pblk_readv_wait(struct compress_params* kparams, + u64* pblkv, u32 count, struct page* page); int pblk_write_wait(struct compress_params* kparams, - u64 pblk, u32 count, struct page** pagev); + u64 pblk, u32 count, struct page* page); void pblk_write(struct compress_params* kparams, - u64 pblk, u32 count, struct page** pagev); + u64 pblk, u32 count, struct page* page); +void pblk_writev(struct compress_params* kparams, + u64* pblkv, u32 count, struct page* page); struct pbat; u32 pbat_zone(struct pbat* pbat); diff --git a/libcbd/check.c b/libcbd/check.c index 0a607db..9e4d6a1 100644 --- a/libcbd/check.c +++ b/libcbd/check.c @@ -10,6 +10,7 @@ typedef off_t off64_t; struct check_state { int fd; + tristate_t auto_response; bool check_lblk_data; bool clean; u64 pblk_used; @@ -17,7 +18,6 @@ struct check_state u8** pbatv; u8* compress_buf; - u8* lz4_workmem; z_stream zlib_dstream; }; @@ -206,7 +206,8 @@ check_lblk_data(struct check_state* state, ret = check_decompress(state, params, data, len); free(data); if (!ret) { - if (ask_user_bool("lblk %u: failed to decompress. Clear?", lblk)) { + if (ask_user_bool(state->auto_response, + "lblk %u: failed to decompress. Clear?", lblk)) { memset(lba, 0, lba_len(params)); return true; } @@ -241,7 +242,8 @@ check_lblk_alloc(struct check_state* state, verbose(2, " lblk[%u]: len=%u\n", lblk, len); } if (len > lblk_size(params)) { - if (ask_user_bool("lblk %u: length %u out of bounds. Clear?", lblk, len)) { + if (ask_user_bool(state->auto_response, + "lblk %u: length %u out of bounds. Clear?", lblk, len)) { memset(lba, 0, lba_len(params)); return true; } @@ -255,7 +257,8 @@ check_lblk_alloc(struct check_state* state, pblk = lba_pblk_get(params, buf, n); if (pblk < CBD_HEADER_BLOCKS) { verbose(2, " [%u] :E: Alloc in header: %lu\n", n, pblk); - if (ask_user_bool("lblk %u: alloc %u in header. Clear?", lblk, n)) { + if (ask_user_bool(state->auto_response, + "lblk %u: alloc %u in header. Clear?", lblk, n)) { memset(lba, 0, lba_len(params)); return true; } @@ -265,7 +268,8 @@ check_lblk_alloc(struct check_state* state, pblk_zone = zone_for_pblk(params, pblk); if (pblk_zone == ZONE_NONE || pblk_zone >= params->nr_zones) { verbose(2, " [%u] :E: Alloc beyond end: %lu\n", n, pblk); - if (ask_user_bool("lblk %u: alloc %u beyond end. Clear?", lblk, n)) { + if (ask_user_bool(state->auto_response, + "lblk %u: alloc %u beyond end. Clear?", lblk, n)) { memset(lba, 0, lba_len(params)); return true; } @@ -274,7 +278,8 @@ check_lblk_alloc(struct check_state* state, } if (pblk < zone_data_off(params, pblk_zone)) { verbose(2, " [%u] :E: Alloc in metadata: %lu\n", n, pblk); - if (ask_user_bool("lblk %u alloc in medatada. Clear?", lblk)) { + if (ask_user_bool(state->auto_response, + "lblk %u alloc in medatada. Clear?", lblk)) { memset(lba, 0, lba_len(params)); return true; } @@ -285,7 +290,8 @@ check_lblk_alloc(struct check_state* state, verbose(3, " [%u] pblk=%lu\n", n, (unsigned long)pblk); if (cbd_bitmap_isset(state->pbatv[pblk_zone], pblk_off)) { verbose(2, " [%u] :E: Duplicate allocation for pblk %lu\n", n, (unsigned long)pblk); - if (ask_user_bool("lblk %u duplicate alloc for pblk %lu. Clear?", lblk, pblk)) { + if (ask_user_bool(state->auto_response, + "lblk %u duplicate alloc for pblk %lu. Clear?", lblk, pblk)) { memset(lba, 0, lba_len(params)); return true; } @@ -294,6 +300,8 @@ check_lblk_alloc(struct check_state* state, } cbd_bitmap_set(state->pbatv[pblk_zone], pblk_off); } + ++state->lblk_used; + state->pblk_used += n_alloc; return false; } @@ -316,7 +324,7 @@ check_lbat(struct check_state* state, const struct cbd_params* params) (unsigned long)zone_off(params, zone), (unsigned long)(zone_data_off(params, zone) - 1), (unsigned long)zone_data_off(params, zone), - (unsigned long)zone_off(params, zone + 1)); + (unsigned long)(zone_off(params, zone + 1) - 1)); for (n = 0; n < params->lblk_per_zone; ++n) { u8* buf = lbat + n * lba_len(params); if (lba_len_get(params, buf) != 0) { @@ -331,7 +339,6 @@ check_lbat(struct check_state* state, const struct cbd_params* params) for (n = 0; n < params->lblk_per_zone; ++n) { u64 lblk = zone * params->lblk_per_zone + n; u8* buf = lbat + n * lba_len(params); - u32 len; if (check_lblk_alloc(state, params, lblk, buf)) { changed = true; } @@ -340,11 +347,6 @@ check_lbat(struct check_state* state, const struct cbd_params* params) changed = true; } } - len = lba_len_get(params, buf); - if (len != 0) { - ++state->lblk_used; - state->pblk_used += DIV_ROUND_UP(len, pblk_size(params)); - } } if (changed) { lbat_write(state->fd, params, zone, lbat); @@ -364,7 +366,8 @@ check_pbat(struct check_state* state, const struct cbd_params* params) bool changed = false; pbat_read(state->fd, params, zone, pbat); if (memcmp(pbat, state->pbatv[zone], pblk_size(params) * pbat_len(params)) != 0) { - if (ask_user_bool("zone %u has incorrect pbat. Fix?", zone)) { + if (ask_user_bool(state->auto_response, + "zone %u has incorrect pbat. Fix?", zone)) { memcpy(pbat, state->pbatv[zone], pblk_size(params) * pbat_len(params)); changed = true; } @@ -395,8 +398,6 @@ cbd_check(const char* dev, if (state.fd < 0) { error("Cannot open device\n"); } - state.check_lblk_data = full_check; - state.clean = true; verbose(1, "Reading header\n"); pblk_read(state.fd, SECTOR_SIZE, 0, 1, buf); @@ -409,16 +410,26 @@ cbd_check(const char* dev, return 0; } + state.auto_response = auto_response; + state.check_lblk_data = full_check; + state.clean = true; state.pbatv = calloc(header.params.nr_zones, sizeof(u8*)); for (n = 0; n < header.params.nr_zones; ++n) { state.pbatv[n] = calloc(pblk_size(&header.params), pbat_len(&header.params)); } + if (full_check) { + state.compress_buf = malloc(lblk_size(&header.params)); + memset(state.compress_buf, 0, lblk_size(&header.params)); + memset(&state.zlib_dstream, 0, sizeof(z_stream)); + inflateInit2(&state.zlib_dstream, MAX_WBITS); + } verbose(1, "Checking lbat\n"); check_lbat(&state, &header.params); verbose(1, "Checking pbat\n"); check_pbat(&state, &header.params); + free(state.compress_buf); for (n = 0; n < header.params.nr_zones; ++n) { free(state.pbatv[n]); } diff --git a/libcbd/format.c b/libcbd/format.c index 112d234..c09556a 100644 --- a/libcbd/format.c +++ b/libcbd/format.c @@ -27,9 +27,9 @@ pblk_write(int fd, u32 pblk_size, u64 pblk, u32 count, const u8* data) int cbd_format(const char* dev, - uint8_t pshift, uint8_t lshift, - uint16_t pbatlen, enum cbd_alg alg, uint level, + uint8_t pshift, uint8_t lshift, + uint8_t pbatshift, uint64_t psize, uint64_t lsize) { int devfd; @@ -45,6 +45,12 @@ cbd_format(const char* dev, error("Cannot open device\n"); } + if (alg <= CBD_ALG_NONE || alg >= CBD_ALG_MAX) { + error("Compression algorithm %d unknown\n", (int)alg); + } + if (level < 1 || level > 9) { + error("Compression level %u out of bounds\n", level); + } if (!pshift) { pshift = CBD_DEFAULT_PHYSICAL_BLOCK_SHIFT; } @@ -61,14 +67,9 @@ cbd_format(const char* dev, (uint)lshift, (uint)LBLK_SHIFT_MIN, (uint)LBLK_SHIFT_MAX); } lblk_size = pblk_size * (1 << lshift); - if (!pbatlen) { - pbatlen = 1; - } - if (alg <= CBD_ALG_NONE || alg >= CBD_ALG_MAX) { - error("Compression algorithm %d unknown\n", (int)alg); - } - if (level < 1 || level > 9) { - error("Compression level %u out of bounds\n", level); + if (pbatshift < PBAT_SHIFT_MIN || pbatshift > PBAT_SHIFT_MAX) { + error("Physical block allocation table shift %u is not in [%u,%u]\n", + (uint)pbatshift, (uint)PBAT_SHIFT_MIN, (uint)PBAT_SHIFT_MAX); } if (!psize) { off_t pos; @@ -92,7 +93,7 @@ cbd_format(const char* dev, printf("%s: parameters...\n", __func__); printf(" pshift=%u\n", (unsigned int)pshift); printf(" lshift=%u\n", (unsigned int)lshift); - printf(" pbatlen=%hu\n", (unsigned short)pbatlen); + printf(" pbatshift=%u\n", (unsigned int)pbatshift); printf(" alg=%d\n", (int)alg); printf(" level=%u\n", level); printf(" psize=%lu\n", (unsigned long)psize); @@ -110,7 +111,7 @@ cbd_format(const char* dev, header.params.lba_elem_pblk_bytes = ((psize / pblk_size) <= 0xffff ? 2 : ((psize / pblk_size) <= 0xffffffff ? 4 : 6)); - header.params.pbat_len = pbatlen; + header.params.pbat_shift = pbatshift; /* XXX: Initial estimate */ header.params.lblk_per_zone = zone_data_len(&header.params) * (lsize / lblk_size) / (psize / pblk_size); printf(" initial estimate for lblk_per_zone: %lu\n", (unsigned long)header.params.lblk_per_zone); @@ -121,7 +122,7 @@ cbd_format(const char* dev, printf(" pblk_shift=%hu\n", (unsigned short)header.params.pblk_shift); printf(" lblk_shift=%hu\n", (unsigned short)header.params.lblk_shift); printf(" lba_elem_pblk_bytes=%hu\n", (unsigned short)header.params.lba_elem_pblk_bytes); - printf(" pbat_len=%hu\n", (unsigned short)header.params.pbat_len); + printf(" pbat_shift=%hu\n", (unsigned short)header.params.pbat_shift); printf(" nr_zones=%lu\n", (unsigned long)header.params.nr_zones); printf(" lblk_per_zone=%lu\n", (unsigned long)header.params.lblk_per_zone); diff --git a/libcbd/util.c b/libcbd/util.c index 7427a4b..71e8167 100644 --- a/libcbd/util.c +++ b/libcbd/util.c @@ -29,18 +29,27 @@ verbose(uint level, const char* fmt, ...) } bool -ask_user_bool(const char* fmt, ...) +ask_user_bool(tristate_t auto_response, const char* fmt, ...) { va_list ap; - char* line = NULL; - size_t len = 0; + char prompt[256]; + char* line; + size_t len; int ret; bool answer; va_start(ap, fmt); + vsnprintf(prompt, sizeof(prompt), fmt, ap); + va_end(ap); again: - vprintf(fmt, ap); - printf(" [y/n]? "); + printf("%s [y/n]? ", prompt); + if (auto_response != t_none) { + answer = (auto_response == t_true); + printf("%s\n", (answer ? "y" : "n")); + return answer; + } + line = NULL; + len = 0; getline(&line, &len, stdin); if (ret > 0) { switch (line[0]) {