checkpoint: Mostly working

This seems to work except for I/O timing. Reads are sync and writes are async, so this sequence fails: - Object x flushes - Object x is reused as y - Another object is taken as x - New object x reads -> Stale data is read Two potential solutions from here: 1. Implement async reads. 2. Hold ref to object until write completes. (1) is complicated, but more correct. Writes may stay in buffers for quite some time (typically 5 seconds), during which time the dm-compress object cannot be released.
2019-10-21 19:39:27 -07:00 · 2019-10-21 19:39:27 -07:00 · 495d191d16
parent 8ff29f0262
commit 495d191d16
12 changed files with 2958 additions and 1410 deletions
--- a/9
+++ b/9
@ -84,7 +84,12 @@ LIB_SRCS := \
 BIN_SRCS := \
 	cbd.c
 KMOD_SRCS := \
-	dm-compress.c
+	util.c \
+	pbat.c \
+	lbatpage.c \
+	lbatview.c \
+	lbd.c \
+	compress.c

 # Intermediates
 LIB_OBJDIR := $(OUT_OBJ)/$(LIB_NAME)
@ -151,7 +156,7 @@ $(BIN_OBJS): $(BIN_OBJDIR)/%.o: $(BIN_NAME)/%.c
 $(OUT_BIN)/$(BIN_NAME)$(EXE_EXT): $(BIN_OBJS) $(OUT_LIB)/$(LIB_NAME)$(LIB_EXT)
 	$(link-executable)

-$(KMOD_NAME)/$(KMOD_NAME)$(KMOD_EXT): $(KMOD_NAME)/$(KMOD_NAME).c
+$(KMOD_NAME)/$(KMOD_NAME)$(KMOD_EXT): $(addprefix $(KMOD_NAME)/,$(KMOD_SRCS))
 	make -C $(KDIR) M=$(TOP)/$(KMOD_NAME) modules

 $(OUT_KMOD)/$(KMOD_NAME)$(KMOD_EXT): $(KMOD_NAME)/$(KMOD_NAME)$(KMOD_EXT)
--- a/44
+++ b/44
@ -1,3 +1,45 @@
+In lbd, atomic writes will require a pblk array in the lbd object.  Not sure
+how to roll back partial allocations yet but it should be doable.
+
+For async reads:
+  - lbd_read() is called by compress_read() and compress_write().
+    lbd may have multiple simultaneous callers.
+    lbd calls lbatview_read() and reads its own data.
+  - lbatview_read() is called by lbd.
+    lbatview may have multiple simultaneous callers.
+    lbatview calls pbat_read() and reads its own data.
+  - pbat_read() is called by lbatview_alloc_pblk() and lbatview_free_pblk().
+    pbat may have multiple simultaneous callers.
+    pbat calls pbat_read().
+
+Rework cache ownership:
+  - compress_open() should alloc only lbdcache.
+  - lbdcache should alloc only lbatviewcache.
+  - lbatviewcache should alloc lbatpagecache and pbatcache.
+
+Cache object sizing:
+  - lbdcache size: multiple of num_online_cpus().
+  - lbatviewcache:
+    Min: one.
+    Max: one per lbd.
+    Avg: 1/2 lbdcache size.
+    => Alloc 1/2 lbdcache size.
+  - lbpatpage cache:
+    Min: lbatviewcache size.
+    Max: 2 * lbatviewcache size.
+    Avg: 1.5 * lbatviewcache size.
+    => alloc 1.5 * lbatviewcache size.
+  - pbatcache size:
+    Min: 1
+    Max: lbatviewcache size.
+    Avg: 1/2 lbatviewcache size.
+    => alloc ???
+       1/2 lbatviewcache size is way too large.
+       Ratio of lbatview to pbat is 1:lbat_per_pbat.
+
+Cache objects should dynamically expand.
+
+
 TODO:
  - Move back to module based build system.
  - Make compression algorithm and speed/level selectable.
@ -8,6 +50,4 @@ TODO:
    - Compressed device must be large enough.
    - Backing device must be large enough.
   - Remove workqueue.
-  - (?) Write / flush once per second.  What about sync?
-  - (?) Don't cache lblk_alloc.
  - (?) Function ptrs for reading and writing lblk_alloc.
--- a/dm-compress/Makefile
+++ b/dm-compress/Makefile
@ -1,5 +1,13 @@
 # Makefile for dm-compress kernel module

+dm-compress-y += \
+	util.o \
+	pbat.o \
+	lbatpage.o \
+	lbatview.o \
+	lbd.o \
+	compress.o
+
 obj-m += dm-compress.o

 ccflags-y := -I$(M)/../include
--- a/dm-compress/compress.c
+++ b/dm-compress/compress.c
@ -0,0 +1,485 @@
+/*
+ * Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA. 
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/device-mapper.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+
+#include <linux/lz4.h>
+
+#include <linux/dm-compress.h>
+
+// XXX: find a better name for this, something about storage vs. speed.
+// XXX: should this be in cbd_params?
+// #define CBD_DETECT_ZERO_BLOCKS
+
+/*
+ * XXX
+ * If we don't use a workqueue, pblk_read() stalls.  Why?
+ */
+#define USE_WORKQUEUE 1
+
+struct compress;
+
+/* per bio private data */
+struct compress_io {
+    struct compress*    c;
+    struct bio*         bio;
+    struct work_struct  work;
+};
+
+struct compress
+{
+    struct dm_dev*              dev;
+
+    struct cbd_params           params;
+    struct lbdcache*            lc;
+
+    struct mutex                io_mutex;
+    struct workqueue_struct*    io_workq;
+    bool                        io_failed;
+};
+
+static inline u64
+blkdev_pblk_size(struct block_device *bdev)
+{
+    return i_size_read(bdev->bd_inode) >> PBLK_SHIFT;
+}
+
+static inline u64
+dm_target_pblk_size(struct dm_target* ti)
+{
+    return ti->len >> (PBLK_SHIFT - SECTOR_SHIFT);
+}
+
+/**************************************
+ * Main functions
+ **************************************/
+
+static int
+compress_open(struct compress* c, u64 dev_nr_pblks)
+{
+    int err;
+    struct page* pblkpage;
+    u8 *pblkbuf;
+    struct page* iopagev[1];
+    struct cbd_header header;
+    u64 max_nr_zones;
+
+    pblkpage = cbd_alloc_page();
+    if (!pblkpage) {
+        return -ENOMEM;
+    }
+    pblkbuf = page_address(pblkpage);
+    iopagev[0] = pblkpage;
+
+    memset(&header, 0, sizeof(header));
+    header.params.priv = c->dev->bdev;
+    err = pblk_read_wait(&header.params, 0, 1, iopagev);
+    if (err) {
+        printk(KERN_ERR "%s: failed to read header\n", __func__);
+        cbd_free_page(pblkpage);
+        return err;
+    }
+    cbd_header_get(pblkbuf, &header);
+    cbd_free_page(pblkpage);
+
+    if (memcmp(header.magic, CBD_MAGIC, sizeof(header.magic)) != 0) {
+        printk(KERN_ERR "%s: bad magic\n", __func__);
+        err = -EINVAL;
+        goto out;
+    }
+    if (header.version_major != CBD_VERSION_MAJOR) {
+        printk(KERN_ERR "%s: bad version\n", __func__);
+        err = -EINVAL;
+        goto out;
+    }
+    if (header.version_minor != CBD_VERSION_MINOR) {
+        printk(KERN_ERR "%s: bad version\n", __func__);
+        err = -EINVAL;
+        goto out;
+    }
+
+    if (header.params.lblk_shift < LBLK_SHIFT_MIN ||
+            header.params.lblk_shift > LBLK_SHIFT_MAX) {
+        printk(KERN_ERR "%s: bad lblk_shift\n", __func__);
+        err = -EINVAL;
+        goto out;
+    }
+    /* XXX: validate minumum pblk using zone_off(max_zone+1) */
+    if (header.params.nr_pblk > dev_nr_pblks) {
+        printk(KERN_ERR "%s: bad nr_pblk\n", __func__);
+        err = -EINVAL;
+        goto out;
+    }
+
+    max_nr_zones = (dev_nr_pblks - CBD_HEADER_BLOCKS) / zone_len(&header.params);
+    if (header.params.nr_zones > max_nr_zones) {
+        printk(KERN_ERR "%s: bad nr_zones\n", __func__);
+        err = -EINVAL;
+        goto out;
+    }
+
+    /* XXX: validate lblk_per_zone */
+
+    printk(KERN_INFO "%s: parameters...\n", __func__);
+    printk(KERN_INFO "  algorithm=%hu\n", (unsigned short)header.params.algorithm);
+    printk(KERN_INFO "  compression=%hu\n", (unsigned short)header.params.compression);
+    printk(KERN_INFO "  lblk_shift=%hu\n", (unsigned short)header.params.lblk_shift);
+    printk(KERN_INFO "  nr_pblk=%lu\n", (unsigned long)header.params.nr_pblk);
+    printk(KERN_INFO "  nr_zones=%u\n", (unsigned int)header.params.nr_zones);
+    printk(KERN_INFO "  lblk_per_zone=%u\n", (unsigned int)header.params.lblk_per_zone);
+
+    memcpy(&c->params, &header.params, sizeof(header.params));
+
+    c->lc = kmalloc(lbdcache_size(), GFP_KERNEL);
+    if (!c->lc) {
+        err = -ENOMEM;
+        printk(KERN_ERR "Failed to alloc lbdcache\n");
+        goto out;
+    }
+    if (!lbdcache_ctr(c->lc, &c->params)) {
+        err = -ENOMEM;
+        printk(KERN_ERR "Failed to init logical block cache\n");
+        goto out;
+    }
+
+    mutex_init(&c->io_mutex);
+    c->io_workq = alloc_workqueue("compress_io", WQ_HIGHPRI | WQ_MEM_RECLAIM, 1);
+    if (!c->io_workq) {
+        printk(KERN_ERR "%s: failed to alloc io_workq\n", __func__);
+        err = -ENOMEM;
+        goto out;
+    }
+    c->io_failed = false;
+
+out:
+    /* XXX: cleanup on error */
+
+    return err;
+}
+
+static struct lbd*
+compress_lbdcache_swap(struct compress* c, u64 lblk, struct lbd* oldlbd)
+{
+    struct lbd* lbd;
+
+    /* Get new data before putting old data to avoid flush */
+    lbd = lbdcache_get(c->lc, lblk);
+    if (!lbd) {
+        printk(KERN_ERR "%s: lbdcache_get failed\n", __func__);
+        lbdcache_put(c->lc, oldlbd);
+        return NULL;
+    }
+    if (lbd_read(lbd) != 0) {
+        printk(KERN_ERR "%s: lbd_read failed\n", __func__);
+        lbdcache_put(c->lc, lbd);
+        lbdcache_put(c->lc, oldlbd);
+        return NULL;
+    }
+    if (lbdcache_put(c->lc, oldlbd) != 0) {
+        printk(KERN_ERR "%s: failed to put oldlbd\n", __func__);
+        lbdcache_put(c->lc, lbd);
+        return NULL;
+    }
+
+    return lbd;
+}
+
+static int
+compress_read(struct compress *c, struct bio *bio)
+{
+    struct lbd* lbd = NULL;
+    struct bio_vec bv;
+    struct bvec_iter iter;
+    int ret;
+    u32 lblk_per_sector = lblk_per_pblk(&c->params) * PBLK_PER_SECTOR;
+    u64 last_lblk = LBLK_NONE;
+
+    bio_for_each_segment(bv, bio, iter) {
+        sector_t lblk = iter.bi_sector / lblk_per_sector;
+        u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
+        unsigned long flags;
+        char* data;
+
+        if (lblk != last_lblk) {
+            lbd = compress_lbdcache_swap(c, lblk, lbd);
+            if (!lbd) {
+                return -EIO;
+            }
+            last_lblk = lblk;
+        }
+
+        data = bvec_kmap_irq(&bv, &flags);
+        lbd_data_read(lbd, lblk_off, bv.bv_len, data);
+        bvec_kunmap_irq(data, &flags);
+    }
+    ret = lbdcache_put(c->lc, lbd);
+
+    return ret;
+}
+
+static int
+compress_write(struct compress *c, struct bio *bio)
+{
+    struct lbd* lbd = NULL;
+    struct bio_vec bv;
+    struct bvec_iter iter;
+    int ret;
+    u32 lblk_per_sector = lblk_per_pblk(&c->params) * PBLK_PER_SECTOR;
+    u64 last_lblk = LBLK_NONE;
+
+    bio_for_each_segment(bv, bio, iter) {
+        sector_t lblk = iter.bi_sector / lblk_per_sector;
+        u32 lblk_off = (iter.bi_sector - lblk * lblk_per_sector) * SECTOR_SIZE;
+        unsigned long flags;
+        char* data;
+
+        if (lblk != last_lblk) {
+            lbd = compress_lbdcache_swap(c, lblk, lbd);
+            if (!lbd) {
+                return -EIO;
+            }
+            last_lblk = lblk;
+        }
+
+        data = bvec_kmap_irq(&bv, &flags);
+        lbd_data_write(lbd, lblk_off, bv.bv_len, data);
+        bvec_kunmap_irq(data, &flags);
+    }
+    ret = lbdcache_put(c->lc, lbd);
+
+    return ret;
+}
+
+static void
+compress_io(struct compress_io* cio)
+{
+    int ret;
+    struct compress* c = cio->c;
+    struct bio* bio = cio->bio;
+
+    mutex_lock(&c->io_mutex);
+    switch (bio_op(bio)) {
+    case REQ_OP_READ:
+        ret = compress_read(c, bio);
+        break;
+    case REQ_OP_WRITE:
+        ret = compress_write(c, bio);
+        break;
+    default:
+        printk(KERN_ERR "%s: unknown op in bio: %u\n", __func__, bio_op(bio));
+        ret = -EINVAL;
+    }
+    mutex_unlock(&c->io_mutex);
+    if (ret) {
+        printk(KERN_ERR "%s: failed, ret=%d\n", __func__, ret);
+    }
+
+    bio->bi_status = (ret == 0 ? BLK_STS_OK : BLK_STS_IOERR); /* XXX */
+    bio_endio(bio);
+}
+
+#ifdef USE_WORKQUEUE
+static void
+compress_io_work(struct work_struct* work)
+{
+    struct compress_io* cio = container_of(work, struct compress_io, work);
+
+    compress_io(cio);
+}
+#endif
+
+/*
+ * Usage:
+ *   echo "<start_sector> <end_sector> compress <backing_device> <args...>" | dmsetup create <compress_name>
+ * Where:
+ *   start_sector is the starting sector of the backing device.
+ *   end_sector is the ending sector of the backing device.
+ *   compress is the name of this module.
+ *   backing_device is the name backing device.
+ *   args is:
+ *     create [lblk_shift=#]
+ *     open
+ *   compress_name is the name of the compress device.
+ */
+static int
+compress_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+    int err;
+    unsigned int argn;
+    struct compress *c = NULL;
+    u64 dev_nr_pblks;
+
+    printk(KERN_INFO "%s: enter: argc=%u\n", __func__, argc);
+    for (argn = 0; argn < argc; ++argn) {
+        printk(KERN_INFO "  ... arg[%u]=\"%s\"\n", argn, argv[argn]);
+    }
+    if (argc == 0) {
+        ti->error = "No device specified";
+        return -EINVAL;
+    }
+
+    argn = 1;
+    while (argn < argc) {
+        const char* arg = argv[argn++];
+        const char* eq = strchr(arg, '=');
+        if (!eq) {
+            ti->error = "Invalid argument format";
+            return -EINVAL;
+        }
+#if 0
+        if (!memcmp(arg, "verbose", 7)) {
+            err = kstrtouint(eq + 1, 0, &verbose_level);
+            if (err) {
+                ti->error = "Failed to parse verbose";
+                return -EINVAL;
+            }
+            continue;
+        }
+#endif
+        ti->error = "Unrecognized argument";
+        return -EINVAL;
+    }
+
+    c = kzalloc(sizeof(struct compress), GFP_KERNEL);
+    if (!c) {
+        ti->error = "Failed to allocate target";
+        return -ENOMEM;
+    }
+
+    if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &c->dev)) {
+        ti->error = "Device lookup failed";
+        kfree(c);
+        return -EINVAL;
+    }
+
+    ti->private = c;
+
+    dev_nr_pblks = dm_target_pblk_size(ti);
+
+    if (get_order(dev_nr_pblks) >= 48) {
+        ti->error = "Device too large";
+        kfree(c);
+        return -EINVAL;
+    }
+
+    ti->per_io_data_size = ALIGN(sizeof(struct compress_io), ARCH_KMALLOC_MINALIGN);
+
+    err = compress_open(c, dev_nr_pblks);
+    if (err) {
+        dm_put_device(ti, c->dev);
+        kfree(c);
+        return err;
+    }
+
+    printk(KERN_INFO "%s: success\n", __func__);
+
+    return 0;
+}
+
+static void
+compress_dtr(struct dm_target *ti)
+{
+    struct compress *c;
+
+    printk(KERN_INFO "%s: enter\n", __func__);
+
+    c = ti->private;
+    lbdcache_dtr(c->lc);
+    kfree(c->lc);
+    if (c->io_workq) {
+        destroy_workqueue(c->io_workq);
+    }
+    dm_put_device(ti, c->dev);
+    kfree(c);
+}
+
+static int
+compress_map(struct dm_target *ti, struct bio *bio)
+{
+    struct compress *c = ti->private;
+    struct compress_io *cio;
+
+    if (c->io_failed) {
+        return DM_MAPIO_KILL;
+    }
+
+    /* from dm-crypt.c */
+    if (unlikely(bio->bi_opf & REQ_PREFLUSH || bio_op(bio) == REQ_OP_DISCARD)) {
+        bio_set_dev(bio, c->dev->bdev);
+        if (bio_sectors(bio)) {
+            /* XXX: remap to underlying data */
+        }
+        return DM_MAPIO_REMAPPED;
+    }
+
+    /* Synchronous I/O operations deadlock, so queue them. */
+    /* XXX: clone the bio? */
+    cio = dm_per_bio_data(bio, ti->per_io_data_size);
+    cio->c = c;
+    cio->bio = bio;
+#ifdef USE_WORKQUEUE
+    INIT_WORK(&cio->work, compress_io_work);
+    queue_work(c->io_workq, &cio->work);
+#else
+    compress_io(io);
+#endif
+
+    return DM_MAPIO_SUBMITTED;
+}
+
+static struct target_type compress_target = {
+    .name = "compress",
+    .version = { 1, 0, 0 },
+    .module = THIS_MODULE,
+    .ctr = compress_ctr,
+    .dtr = compress_dtr,
+    .map = compress_map,
+};
+
+static int __init
+dm_compress_init(void)
+{
+    int res;
+    res = dm_register_target(&compress_target);
+    if (res < 0) {
+        printk(KERN_ERR "Failed to register dm-compress: %d\n", res);
+    }
+
+    return res;
+}
+
+static void __exit
+dm_compress_exit(void)
+{
+    dm_unregister_target(&compress_target);
+}
+
+module_init(dm_compress_init);
+module_exit(dm_compress_exit);
+
+MODULE_DESCRIPTION("compress target for transparent compression");
+MODULE_AUTHOR("Tom Marshall <tdm.code@gmail.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
--- a/dm-compress/dm-compress.c
+++ b/dm-compress/dm-compress.c
--- a/dm-compress/lbatpage.c
+++ b/dm-compress/lbatpage.c
@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA. 
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/device-mapper.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+
+#include <linux/lz4.h>
+
+#include <linux/dm-compress.h>
+
+struct lbatpage {
+    u64                         pblk;
+    struct mutex                reflock;
+    unsigned int                ref;
+
+    struct mutex                lock;
+    enum cache_state            state;
+    struct cbd_params*          params;
+    struct page*                page;
+    u8*                         buf;
+    bool                        dirty;
+};
+
+bool
+lbatpage_ctr(struct lbatpage* lp, struct cbd_params* params)
+{
+    lp->pblk = PBLK_NONE;
+    mutex_init(&lp->reflock);
+    lp->ref = 0;
+    mutex_init(&lp->lock);
+    lp->state = CACHE_STATE_UNCACHED;
+    lp->params = params;
+    lp->page = cbd_alloc_page();
+    if (!lp->page) {
+        return false;
+    }
+    lp->buf = page_address(lp->page);
+    lp->dirty = false;
+
+    return true;
+}
+
+void
+lbatpage_dtr(struct lbatpage* lp)
+{
+    lp->buf = NULL;
+    cbd_free_page(lp->page);
+    lp->page = NULL;
+}
+
+static void
+lbatpage_flush_endio(struct bio* bio)
+{
+    int ret;
+
+    cbd_free_page(bio->bi_io_vec[0].bv_page);
+    ret = pblk_endio(bio);
+    if (ret) {
+        /* XXX: ...? */
+        printk(KERN_ERR "%s: I/O failed\n", __func__);
+    }
+}
+
+int
+lbatpage_flush(struct lbatpage* lp)
+{
+    int ret = 0;
+    struct page* iopagev[1];
+
+    mutex_lock(&lp->lock);
+    if (lp->state != CACHE_STATE_DIRTY) {
+        goto out;
+    }
+    iopagev[0] = cbd_alloc_page();
+    if (!iopagev[0]) {
+        printk(KERN_ERR "%s: out of memory\n", __func__);
+        ret = -ENOMEM;
+        goto out;
+    }
+    memcpy(page_address(iopagev[0]), lp->buf, PAGE_SIZE);
+    pblk_write(lp->params, lp->pblk, 1, iopagev, lbatpage_flush_endio, lp);
+    lp->state = CACHE_STATE_CLEAN;
+
+out:
+    mutex_unlock(&lp->lock);
+    return ret;
+}
+
+int
+lbatpage_read(struct lbatpage* lp)
+{
+    int ret = 0;
+    struct page* pagev[1];
+
+    ret = lbatpage_flush(lp);
+    if (ret) {
+        return ret;
+    }
+    mutex_lock(&lp->lock);
+    if (lp->state == CACHE_STATE_CLEAN) {
+        goto out;
+    }
+    pagev[0] = lp->page;
+    ret = pblk_read_wait(lp->params, lp->pblk, 1, pagev);
+    if (ret) {
+        goto out;
+    }
+    lp->state = CACHE_STATE_CLEAN;
+
+out:
+    mutex_unlock(&lp->lock);
+    return ret;
+}
+
+void
+lbatpage_reset(struct lbatpage* lp, u64 pblk)
+{
+    BUG_ON(lp->pblk == pblk);
+    lp->pblk = pblk;
+    lp->state = CACHE_STATE_UNCACHED;
+}
+
+u8*
+lbatpage_get_buf(struct lbatpage* lp, bool rw)
+{
+    mutex_lock(&lp->lock);
+    if (rw) {
+        lp->state = CACHE_STATE_DIRTY;
+    }
+    return lp->buf;
+}
+
+void
+lbatpage_put_buf(struct lbatpage* lp)
+{
+    mutex_unlock(&lp->lock);
+}
+
+struct lbatpagecache {
+    struct mutex                lock;
+    struct cbd_params*          params;
+    unsigned int                len;
+    struct lbatpage**           cache;
+};
+
+size_t
+lbatpagecache_size(void)
+{
+    return sizeof(struct lbatpagecache);
+}
+
+static bool
+lbatpagecache_realloc(struct lbatpagecache* lpc, unsigned int len)
+{
+    struct lbatpage** cache;
+    unsigned int n;
+    struct lbatpage* lp;
+
+    cache = kzalloc(len * sizeof(struct lbatpage*), GFP_KERNEL);
+    if (!cache) {
+        return false;
+    }
+    n = 0;
+    if (lpc->len) {
+        memcpy(cache, lpc->cache, lpc->len * sizeof(struct lbatpage*));
+        n = lpc->len;
+        kfree(lpc->cache);
+    }
+    lpc->len = len;
+    lpc->cache = cache;
+    while (n < len) {
+        lp = kmalloc(sizeof(struct lbatpage), GFP_KERNEL);
+        if (!lp) {
+            return false;
+        }
+        cache[n++] = lp;
+        if (!lbatpage_ctr(lp, lpc->params)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool
+lbatpagecache_ctr(struct lbatpagecache* lpc,
+        struct cbd_params* params)
+{
+    memset(lpc, 0, sizeof(struct lbatpagecache));
+    mutex_init(&lpc->lock);
+    lpc->params = params;
+
+    return lbatpagecache_realloc(lpc, 1);
+}
+
+void
+lbatpagecache_dtr(struct lbatpagecache* lpc)
+{
+    unsigned int n;
+    struct lbatpage* lp;
+
+    for (n = 0; n < lpc->len; ++n) {
+        lp = lpc->cache[n];
+        if (!lp) {
+            continue;
+        }
+        lbatpage_dtr(lp);
+        if (lp->ref) {
+            printk(KERN_ERR "%s: lbatpage ref leak: n=%u ref=%u\n", __func__, n, lp->ref);
+        }
+        kfree(lp);
+    }
+    kfree(lpc->cache);
+    lpc->cache = NULL;
+    lpc->len = 0;
+    lpc->params = NULL;
+}
+
+struct lbatpage*
+lbatpagecache_get(struct lbatpagecache* lpc, u64 pblk)
+{
+    unsigned int n;
+    struct lbatpage* lp;
+
+    mutex_lock(&lpc->lock);
+    for (n = 0; n < lpc->len; ++n) {
+        lp = lpc->cache[n];
+        mutex_lock(&lp->reflock);
+        if (lp->pblk == pblk) {
+            ++lp->ref;
+            mutex_unlock(&lp->reflock);
+            goto out;
+        }
+        mutex_unlock(&lp->reflock);
+    }
+    for (n = 0; n < lpc->len; ++n) {
+        lp = lpc->cache[n];
+        mutex_lock(&lp->reflock);
+        if (lp->pblk == PBLK_NONE) {
+            goto found;
+        }
+        mutex_unlock(&lp->reflock);
+    }
+    for (n = 0; n < lpc->len; ++n) {
+        lp = lpc->cache[n];
+        mutex_lock(&lp->reflock);
+        if (lp->ref == 0) {
+            goto found;
+        }
+        mutex_unlock(&lp->reflock);
+    }
+    printk(KERN_INFO "%s: all pages in use, realloc...\n", __func__);
+    n = lpc->len;
+    if (!lbatpagecache_realloc(lpc, lpc->len * 2)) {
+        printk(KERN_ERR "%s: realloc failed\n", __func__);
+        lp = NULL;
+        goto out;
+    }
+    printk(KERN_INFO "%s: realloc done, using n=%u\n", __func__, n);
+    lp = lpc->cache[n];
+    mutex_lock(&lp->reflock);
+
+found:
+    lbatpage_reset(lp, pblk);
+    lp->ref = 1;
+    mutex_unlock(&lp->reflock);
+
+out:
+    mutex_unlock(&lpc->lock);
+
+    return lp;
+}
+
+int
+lbatpagecache_put(struct lbatpagecache* lpc, struct lbatpage* lp)
+{
+    int ret = 0;
+
+    if (!lp) {
+        return 0;
+    }
+    mutex_lock(&lpc->lock);
+    mutex_lock(&lp->reflock);
+    if (--lp->ref == 0) {
+        ret = lbatpage_flush(lp);
+        if (ret) {
+            printk(KERN_ERR "%s: lbatpage_flush failed\n", __func__);
+        }
+    }
+    mutex_unlock(&lp->reflock);
+    mutex_unlock(&lpc->lock);
+
+    return ret;
+}
--- a/dm-compress/lbatview.c
+++ b/dm-compress/lbatview.c
@ -0,0 +1,681 @@
+/*
+ * Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA. 
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/device-mapper.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+
+#include <linux/lz4.h>
+
+#include <linux/dm-compress.h>
+
+struct lbatview {
+    u64                         pblk;
+    struct mutex                reflock;
+    unsigned int                ref;
+
+    struct mutex                lock;
+    enum cache_state            state;
+    struct cbd_params*          params;
+    struct pbatcache*           pbatcache;
+    struct pbat*                pbat;
+    struct lbatpagecache*       lpc;
+    struct lbatpage*            pages[2];
+};
+
+bool
+lbatview_ctr(struct lbatview* lv,
+        struct cbd_params* params,
+        struct pbatcache* pbatcache,
+        struct lbatpagecache* lpc)
+{
+    memset(lv, 0, sizeof(struct lbatview));
+    lv->pblk = PBLK_NONE;
+    mutex_init(&lv->reflock);
+    lv->ref = 0;
+    mutex_init(&lv->lock);
+    lv->state = CACHE_STATE_UNCACHED;
+    lv->params = params;
+    lv->pbatcache = pbatcache;
+    lv->pbat = NULL;
+    lv->lpc = lpc;
+    lv->pages[0] = lv->pages[1] = NULL;
+
+    return true;
+}
+
+void
+lbatview_dtr(struct lbatview* lv)
+{
+    if (pbatcache_put(lv->pbatcache, lv->pbat) != 0) {
+        printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
+    }
+    lv->pbat = NULL;
+    lbatpagecache_put(lv->lpc, lv->pages[0]);
+    lbatpagecache_put(lv->lpc, lv->pages[1]);
+    lv->pages[0] = lv->pages[1] = NULL;
+    lv->lpc = NULL;
+}
+
+int
+lbatview_flush(struct lbatview* lv)
+{
+    int ret = 0;
+
+    mutex_lock(&lv->lock);
+    if (lv->state != CACHE_STATE_DIRTY) {
+        goto out;
+    }
+    BUG_ON(!lv->pages[0]);
+    BUG_ON(lv->pblk == PBLK_NONE);
+    if (lv->pages[0]) {
+        ret = lbatpage_flush(lv->pages[0]);
+        if (ret) {
+            goto out;
+        }
+    }
+    if (lv->pages[1]) {
+        ret = lbatpage_flush(lv->pages[1]);
+        if (ret) {
+            goto out;
+        }
+    }
+    lv->state = CACHE_STATE_CLEAN;
+
+out:
+    mutex_unlock(&lv->lock);
+    return ret;
+}
+
+int
+lbatview_read(struct lbatview* lv)
+{
+    int ret = 0;
+
+    ret = lbatview_flush(lv);
+    if (ret) {
+        return ret;
+    }
+    mutex_lock(&lv->lock);
+    if (lv->pages[0]) {
+        ret = lbatpage_read(lv->pages[0]);
+        if (ret) {
+            goto out;
+        }
+    }
+    if (lv->pages[1]) {
+        ret = lbatpage_read(lv->pages[1]);
+        if (ret) {
+            goto out;
+        }
+    }
+    lv->state = CACHE_STATE_CLEAN;
+
+out:
+    mutex_unlock(&lv->lock);
+    return ret;
+}
+
+bool
+lbatview_reset(struct lbatview* lv, u64 pblk, u32 count)
+{
+    bool ret = true;
+    u32 zone = (pblk - CBD_HEADER_BLOCKS) / zone_len(lv->params);
+
+    BUG_ON(lv->pblk == pblk);
+    lv->pblk = pblk;
+    lv->state = CACHE_STATE_UNCACHED;
+    if (pbatcache_put(lv->pbatcache, lv->pbat) != 0) {
+        printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
+        ret = false;
+    }
+    lv->pbat = pbatcache_get(lv->pbatcache, zone);
+    if (!lv->pbat) {
+        ret = false;
+    }
+    if (lbatpagecache_put(lv->lpc, lv->pages[0]) != 0) {
+        ret = false;
+    }
+    lv->pages[0] = NULL;
+    if (lbatpagecache_put(lv->lpc, lv->pages[1]) != 0) {
+        ret = false;
+    }
+    lv->pages[1] = NULL;
+    if (count > 0) {
+        lv->pages[0] = lbatpagecache_get(lv->lpc, pblk + 0);
+    }
+    if (count > 1) {
+        lv->pages[1] = lbatpagecache_get(lv->lpc, pblk + 1);
+    }
+
+    return ret;
+}
+
+static u64
+lbatview_alloc_pblk(struct lbatview* lv)
+{
+    int ret = 0;
+    u32 zone = (lv->pblk - CBD_HEADER_BLOCKS) / zone_len(lv->params);
+    u64 pblk;
+    u32 zone_off;
+    struct pbat* pbat;
+
+    if (!lv->pbat) {
+        printk(KERN_ERR "%s: *** lv->pbat is NULL\n", __func__);
+        return PBLK_NONE;
+    }
+
+    pblk = pbat_alloc(lv->pbat);
+    if (pblk != PBLK_NONE) {
+        return pblk;
+    }
+    printk(KERN_INFO "%s: alloc failed for current zone\n", __func__);
+    ret = pbatcache_put(lv->pbatcache, lv->pbat);
+    if (ret) {
+        printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
+        return PBLK_NONE;
+    }
+    lv->pbat = NULL;
+    for (zone_off = 1;
+            zone_off <= zone || zone + zone_off < lv->params->nr_zones;
+            ++zone_off) {
+        if (zone_off <= zone) {
+            pbat = pbatcache_get(lv->pbatcache, zone - zone_off);
+            if (!pbat) {
+                printk(KERN_ERR "%s: pbatcache_get failed\n", __func__);
+                return PBLK_NONE;
+            }
+            if (pbat_read(pbat) != 0) {
+                printk(KERN_ERR "%s: pbat_read failed\n", __func__);
+                return PBLK_NONE;
+            }
+            pblk = pbat_alloc(pbat);
+            if (pblk != PBLK_NONE) {
+                printk(KERN_INFO "%s: using zone %u, alloc=%lu\n", __func__, (zone - zone_off), (unsigned long)pblk);
+                lv->pbat = pbat;
+                return pblk;
+            }
+            ret = pbatcache_put(lv->pbatcache, pbat);
+            if (ret) {
+                printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
+                return PBLK_NONE;
+            }
+        }
+        if (zone + zone_off < lv->params->nr_zones) {
+            pbat = pbatcache_get(lv->pbatcache, zone + zone_off);
+            if (!pbat) {
+                printk(KERN_ERR "%s: pbatcache_get failed\n", __func__);
+                return PBLK_NONE;
+            }
+            if (pbat_read(pbat) != 0) {
+                printk(KERN_ERR "%s: pbat_read failed\n", __func__);
+                return PBLK_NONE;
+            }
+            pblk = pbat_alloc(pbat);
+            if (pblk != PBLK_NONE) {
+                printk(KERN_INFO "%s: using zone %u, alloc=%lu\n", __func__, (zone + zone_off), (unsigned long)pblk);
+                lv->pbat = pbat;
+                return pblk;
+            }
+            ret = pbatcache_put(lv->pbatcache, pbat);
+            if (ret) {
+                printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
+                return PBLK_NONE;
+            }
+        }
+    }
+    printk(KERN_ERR "%s: fail, all zones full\n", __func__);
+
+    return PBLK_NONE;
+}
+
+static int
+lbatview_free_pblk(struct lbatview* lv, u64 pblk)
+{
+    int ret = 0;
+    u32 zone = (lv->pblk - CBD_HEADER_BLOCKS) / zone_len(lv->params);
+    u32 pblk_zone;
+    struct pbat* pbat;
+
+    if (!lv->pbat) {
+        printk(KERN_ERR "%s: *** lv->pbat is NULL\n", __func__);
+        return -EINVAL;
+    }
+
+    if (pblk < CBD_HEADER_BLOCKS) {
+        printk(KERN_ERR "%s: pblk index is in header\n", __func__);
+        return -EINVAL;
+    }
+    pblk_zone = (pblk - CBD_HEADER_BLOCKS) / zone_len(lv->params);
+    if (pblk_zone >= lv->params->nr_zones) {
+        printk(KERN_ERR "%s: pblk zone out of bounds\n", __func__);
+        return -EINVAL;
+    }
+    pbat = pbatcache_get(lv->pbatcache, pblk_zone);
+    if (!pbat) {
+        printk(KERN_ERR "%s: pbatcache_get failed\n", __func__);
+        return -EINVAL;
+    }
+    ret = pbat_read(pbat);
+    if (ret != 0) {
+        printk(KERN_ERR "%s: pbat_read failed\n", __func__);
+        return ret;
+    }
+    ret = pbat_free(pbat, pblk);
+    if (pblk_zone == zone && pbat_zone(lv->pbat) != zone) {
+        printk(KERN_INFO "%s: freed block %lu in zone %u switching back\n", __func__, (unsigned long)pblk, zone);
+        ret = pbatcache_put(lv->pbatcache, lv->pbat);
+        if (ret) {
+            printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
+        }
+        lv->pbat = pbat;
+    }
+    else {
+        ret = pbatcache_put(lv->pbatcache, pbat);
+        if (ret) {
+            printk(KERN_ERR "%s: pbatcache_put failed\n", __func__);
+        }
+    }
+
+    return ret;
+}
+
+static u32
+lbatview_elem_off(struct lbatview* lv, u64 lblk)
+{
+    u32 lv_zone = (lv->pblk - CBD_HEADER_BLOCKS) / zone_len(lv->params);
+    /* The relative lblk in the zone. */
+    u32 zone_rel_lblk = lblk - (lv_zone * lv->params->lblk_per_zone);
+    /* The offset of the element in the (full) lbat. */
+    u32 lbat_elem_off = zone_rel_lblk * lba_len(lv->params);
+    /* The offset of the first view pblk. */
+    u32 lbatview_off = PBLK_SIZE * (lv->pblk - lbat_off(lv->params, lv_zone));
+
+    return lbat_elem_off - lbatview_off;
+}
+
+static void
+lbatview_rmem(struct lbatview* lv, u32 off, u32 len, void* buf)
+{
+    /* XXX: Convert below to a BUG_ON */
+    if (off + len > 2 * PAGE_SIZE) {
+        printk(KERN_ERR "%s: *** out of bounds\n", __func__);
+        return;
+    }
+    if (off < PAGE_SIZE) {
+        if (!lv->pages[0]) {
+            printk(KERN_ERR "%s *** no page0\n", __func__);
+            return;
+        }
+    }
+    if (off + len > PAGE_SIZE) {
+        if (!lv->pages[1]) {
+            printk(KERN_ERR "%s *** no page1\n", __func__);
+            return;
+        }
+    }
+    if (off < PAGE_SIZE && off + len > PAGE_SIZE) {
+        u32 len0 = PAGE_SIZE - off;
+        u8* pagebuf0 = lbatpage_get_buf(lv->pages[0], false);
+        u8* pagebuf1 = lbatpage_get_buf(lv->pages[1], false);
+        memcpy(buf, pagebuf0 + off, len0);
+        memcpy(buf + len0, pagebuf1, len - len0);
+        lbatpage_put_buf(lv->pages[1]);
+        lbatpage_put_buf(lv->pages[0]);
+    }
+    else {
+        u32 bufidx = off / PAGE_SIZE;
+        u32 bufoff = off % PAGE_SIZE;
+        u8* pagebuf = lbatpage_get_buf(lv->pages[bufidx], false);
+        memcpy(buf, pagebuf + bufoff, len);
+        lbatpage_put_buf(lv->pages[bufidx]);
+    }
+}
+
+static void
+lbatview_wmem(struct lbatview* lv, u32 off, u32 len, void* buf)
+{
+    /* XXX: Convert below to a BUG_ON */
+    if (off + len > 2 * PAGE_SIZE) {
+        printk(KERN_ERR "%s: *** out of bounds\n", __func__);
+        return;
+    }
+    if (off < PAGE_SIZE) {
+        if (!lv->pages[0]) {
+            printk(KERN_ERR "%s *** no page0\n", __func__);
+            return;
+        }
+    }
+    if (off + len > PAGE_SIZE) {
+        if (!lv->pages[1]) {
+            printk(KERN_ERR "%s *** no page1\n", __func__);
+            return;
+        }
+    }
+    if (off < PAGE_SIZE && off + len > PAGE_SIZE) {
+        u32 len0 = PAGE_SIZE - off;
+        u8* pagebuf0 = lbatpage_get_buf(lv->pages[0], true);
+        u8* pagebuf1 = lbatpage_get_buf(lv->pages[1], true);
+        memcpy(pagebuf0 + off, buf, len0);
+        memcpy(pagebuf1, buf + len0, len - len0);
+        lbatpage_put_buf(lv->pages[1]);
+        lbatpage_put_buf(lv->pages[0]);
+    }
+    else {
+        u32 bufidx = off / PAGE_SIZE;
+        u32 bufoff = off % PAGE_SIZE;
+        u8* pagebuf = lbatpage_get_buf(lv->pages[bufidx], true);
+        memcpy(pagebuf + bufoff, buf, len);
+        lbatpage_put_buf(lv->pages[bufidx]);
+    }
+    lv->state = CACHE_STATE_DIRTY;
+}
+
+int
+lbatview_elem_realloc(struct lbatview* lv, u64 lblk, u32 len)
+{
+    int ret = 0;
+    u32 off;
+    u32 n;
+    u64 pblk;
+    u32 elem_len_size = (lv->params->lblk_shift + PBLK_SHIFT > 16) ? 4 : 2;
+    u32 elem_pblk_size = (lv->params->nr_pblk <= 0xffff ? 2 :
+                          (lv->params->nr_pblk <= 0xffffffff ? 4 : 6));
+    u32 elem_lelen;
+    u64 elem_lepblk;
+
+    mutex_lock(&lv->lock);
+    off = lbatview_elem_off(lv, lblk);
+    elem_lelen = __cpu_to_le32(len);
+    lbatview_wmem(lv, off, elem_len_size, &elem_lelen);
+    off += elem_len_size;
+    if (len == CBD_UNCOMPRESSED) {
+        len = PBLK_SIZE * lblk_per_pblk(lv->params);
+    }
+    for (n = 0; n < lblk_per_pblk(lv->params); ++n, off += elem_pblk_size) {
+        elem_lepblk = 0;
+        lbatview_rmem(lv, off, elem_pblk_size, &elem_lepblk);
+        pblk = __le64_to_cpu(elem_lepblk);
+        if (len > PBLK_SIZE * n) {
+            if (pblk == 0) {
+                pblk = lbatview_alloc_pblk(lv);
+                if (pblk == PBLK_NONE) {
+                    printk(KERN_ERR "  lbat_alloc_pblk failed\n");
+                    ret = -ENOSPC;
+                    goto out; /* XXX: undo */
+                }
+                elem_lepblk = __cpu_to_le64(pblk);
+                lbatview_wmem(lv, off, elem_pblk_size, &elem_lepblk);
+            }
+        }
+        else {
+            if (pblk != 0) {
+                elem_lepblk = 0;
+                lbatview_wmem(lv, off, elem_pblk_size, &elem_lepblk);
+                ret = lbatview_free_pblk(lv, pblk);
+                if (ret) {
+                    printk(KERN_ERR "  lbat_free_pblk failed\n");
+                    goto out; /* XXX: undo */
+                }
+            }
+        }
+    }
+
+out:
+    mutex_unlock(&lv->lock);
+    return ret;
+}
+
+u32
+lbatview_elem_len(struct lbatview* lv, u64 lblk)
+{
+    u32 off;
+    u32 elem_len_size = (lv->params->lblk_shift + PBLK_SHIFT > 16) ? 4 : 2;
+    u32 elem_lelen;
+
+    mutex_lock(&lv->lock);
+    off = lbatview_elem_off(lv, lblk);
+    elem_lelen = 0;
+    lbatview_rmem(lv, off, elem_len_size, &elem_lelen);
+    mutex_unlock(&lv->lock);
+
+    return __le32_to_cpu(elem_lelen);
+}
+
+u64
+lbatview_elem_pblk(struct lbatview* lv, u64 lblk, u32 idx)
+{
+    u32 off;
+    u32 elem_len_size = (lv->params->lblk_shift + PBLK_SHIFT > 16) ? 4 : 2;
+    u32 elem_pblk_size = (lv->params->nr_pblk <= 0xffff ? 2 :
+                          (lv->params->nr_pblk <= 0xffffffff ? 4 : 6));
+    u64 elem_lepblk;
+
+    mutex_lock(&lv->lock);
+    off = lbatview_elem_off(lv, lblk) +
+            elem_len_size + idx * elem_pblk_size;
+    elem_lepblk = 0;
+    lbatview_rmem(lv, off, elem_pblk_size, &elem_lepblk);
+    mutex_unlock(&lv->lock);
+
+    return __le64_to_cpu(elem_lepblk);
+}
+
+struct lbatviewcache {
+    struct mutex                lock;
+    struct cbd_params*          params;
+    struct pbatcache*           pc;
+    struct lbatpagecache*       lpc;
+    unsigned int                len;
+    struct lbatview**           cache;
+};
+
+size_t
+lbatviewcache_size(void)
+{
+    return sizeof(struct lbatviewcache);
+}
+
+static bool
+lbatviewcache_realloc(struct lbatviewcache* lvc, unsigned int len)
+{
+    struct lbatview** cache;
+    unsigned int n;
+    struct lbatview* lv;
+
+    cache = kzalloc(len * sizeof(struct lbatview*), GFP_KERNEL);
+    if (!cache) {
+        return false;
+    }
+    n = 0;
+    if (lvc->len) {
+        memcpy(cache, lvc->cache, lvc->len * sizeof(struct lbatview*));
+        n = lvc->len;
+        kfree(lvc->cache);
+    }
+    lvc->len = len;
+    lvc->cache = cache;
+    while (n < len) {
+        lv = kmalloc(sizeof(struct lbatview), GFP_KERNEL);
+        if (!lv) {
+            return false;
+        }
+        cache[n++] = lv;
+        if (!lbatview_ctr(lv, lvc->params, lvc->pc, lvc->lpc)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool
+lbatviewcache_ctr(struct lbatviewcache* lvc,
+        struct cbd_params* params)
+{
+    memset(lvc, 0, sizeof(struct lbatviewcache));
+    mutex_init(&lvc->lock);
+    lvc->params = params;
+    lvc->pc = kmalloc(pbatcache_size(), GFP_KERNEL);
+    if (!lvc->pc) {
+        return false;
+    }
+    if (!pbatcache_ctr(lvc->pc, params)) {
+        return false;
+    }
+    lvc->lpc = kmalloc(lbatpagecache_size(), GFP_KERNEL);
+    if (!lvc->lpc) {
+        return false;
+    }
+    if (!lbatpagecache_ctr(lvc->lpc, params)) {
+        return false;
+    }
+
+    return lbatviewcache_realloc(lvc, 1);
+}
+
+void
+lbatviewcache_dtr(struct lbatviewcache* lvc)
+{
+    unsigned int n;
+    struct lbatview* lv;
+
+    for (n = 0; n < lvc->len; ++n) {
+        lv = lvc->cache[n];
+        if (!lv) {
+            continue;
+        }
+        lbatview_dtr(lv);
+        if (lv->ref) {
+            printk(KERN_ERR "%s: lbatview ref leak: n=%u ref=%u\n", __func__, n, lv->ref);
+        }
+        kfree(lv);
+    }
+    kfree(lvc->cache);
+    lvc->cache = NULL;
+    lvc->len = 0;
+    lbatpagecache_dtr(lvc->lpc);
+    kfree(lvc->lpc);
+    lvc->lpc = NULL;
+    pbatcache_dtr(lvc->pc);
+    kfree(lvc->pc);
+    lvc->pc = NULL;
+    lvc->params = NULL;
+}
+
+struct lbatview*
+lbatviewcache_get(struct lbatviewcache* lvc, u64 lblk)
+{
+    u32 zone;
+    u64 zone_lbat_pblk;
+    u32 rel_lblk;
+    u32 lbat_offset;
+    u32 rel_pblk;
+    u64 pblk;
+    u32 count;
+
+    unsigned int n;
+    struct lbatview* lv;
+
+    zone = lblk / lvc->params->lblk_per_zone;
+    zone_lbat_pblk = lbat_off(lvc->params, zone);
+    rel_lblk = lblk - lvc->params->lblk_per_zone * zone;
+    lbat_offset = rel_lblk * lba_len(lvc->params);
+    rel_pblk = lbat_offset / PBLK_SIZE;
+    pblk = zone_lbat_pblk + rel_pblk;
+    count = (rel_pblk == lbat_len(lvc->params) - 1) ? 1 : 2;
+
+    mutex_lock(&lvc->lock);
+    for (n = 0; n < lvc->len; ++n) {
+        lv = lvc->cache[n];
+        mutex_lock(&lv->reflock);
+        if (lv->pblk == pblk) {
+            ++lv->ref;
+            mutex_unlock(&lv->reflock);
+            goto out;
+        }
+        mutex_unlock(&lv->reflock);
+    }
+    for (n = 0; n < lvc->len; ++n) {
+        lv = lvc->cache[n];
+        mutex_lock(&lv->reflock);
+        if (lv->pblk == PBLK_NONE) {
+            goto found;
+        }
+        mutex_unlock(&lv->reflock);
+    }
+    for (n = 0; n < lvc->len; ++n) {
+        lv = lvc->cache[n];
+        mutex_lock(&lv->reflock);
+        if (lv->ref == 0) {
+            goto found;
+        }
+        mutex_unlock(&lv->reflock);
+    }
+    printk(KERN_INFO "%s: all objects in use, realloc...\n", __func__);
+    n = lvc->len;
+    if (!lbatviewcache_realloc(lvc, lvc->len * 2)) {
+        printk(KERN_ERR "%s: realloc failed\n", __func__);
+        lv = NULL;
+        goto out;
+    }
+    lv = lvc->cache[n];
+    mutex_lock(&lv->reflock);
+
+found:
+    if (!lbatview_reset(lv, pblk, count)) {
+        mutex_unlock(&lv->reflock);
+        printk(KERN_ERR "%s: lbatview_reset failed\n", __func__);
+        lv = NULL;
+        goto out;
+    }
+    lv->ref = 1;
+    mutex_unlock(&lv->reflock);
+
+out:
+    mutex_unlock(&lvc->lock);
+
+    return lv;
+}
+
+int
+lbatviewcache_put(struct lbatviewcache* lvc, struct lbatview* lv)
+{
+    int ret = 0;
+
+    if (!lv) {
+        return 0;
+    }
+    mutex_lock(&lvc->lock);
+    mutex_lock(&lv->reflock);
+    if (--lv->ref == 0) {
+        ret = lbatview_flush(lv);
+        if (ret) {
+            printk(KERN_ERR "%s: lbatview_flush failed\n", __func__);
+        }
+    }
+    mutex_unlock(&lv->reflock);
+    mutex_unlock(&lvc->lock);
+
+    return ret;
+}
--- a/dm-compress/lbd.c
+++ b/dm-compress/lbd.c
@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA. 
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/device-mapper.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+
+#include <linux/lz4.h>
+
+#include <linux/dm-compress.h>
+
+struct lbd {
+    u64                         lblk;
+    struct mutex                reflock;
+    unsigned int                ref;
+
+    struct mutex                lock;
+    enum cache_state            state;
+    struct cbd_params*          params;
+    struct lbatviewcache*       lvc;
+    struct lbatview*            lv;
+    u8*                         lz4_wrkmem;
+    struct page*                lz4_cpages;
+    u8*                         lz4_cbuf;
+    struct page*                pages;
+    u8*                         buf;
+};
+
+/*
+ * Allocating lz4_wrkmem percpu:
+ *
+ * If the alloc is per-instance, it would need to be allocated in compress.c
+ * and passed around.  The easiest way to pass it around is likely to make it
+ * part of a struct.  We can't use struct compress because that is private.
+ * So we would need to create a struct (say, compress_percpu).
+ *
+ * If the alloc is global, we can just declare it file-local.  But it would
+ * need to be the largest possible size.  Which means we probably don't want
+ * to use alloc_percpu_gfp() directly, because 1mb chunks are probably not
+ * that common.  So suppose we allocate a percpu vector of page ptrs.
+ *
+ * #define COMPRESS_MAX_INPUT_SIZE (1 << LBLK_SHIFT_MAX)
+ * #define COMPRESS_LZ4_BOUND      LZ4_COMPRESSBOUND(COMPRESS_MAX_INPUT_SIZE)
+ * #define WRKMEM_PAGES            DIV_ROUND_UP(COMPRESS_LZ4_BOUND, PAGE_SIZE)
+ * typedef struct page*[WRKMEM_PAGES] lz4_wrkmem_pagev_t;
+ *
+ * g_lz4_wrkmem = alloc_percpu_gfp(lz4_wrkmem_pagev_t, GFP_IO);
+ *
+ * That's not bad at all.  But how do we alloc (and free) the actual pages?
+ *
+ * pagev = get_cpu_var(g_lz4_wrkmem);
+ * put_cpu_var(pagev);
+ *
+ * free_percpu(g_lz4_wrkmem);
+ */
+
+static inline bool
+lblk_is_zeros(struct cbd_params* params, struct lbd* lbd)
+{
+#ifdef CBD_DETECT_ZERO_BLOCKS
+    u32 off;
+    u32 len = PBLK_SIZE * lblk_per_pblk(params);
+
+    for (off = 0; off < len; ++off) {
+        if (lbd->lblk_buf[off]) {
+            return false;
+        }
+    }
+
+    return true;
+#else
+    return false;
+#endif
+}
+
+/*
+ * Compress dc->lblk into dc->lz4_cbuf
+ *
+ * Returns number of bytes in cbuf or 0 for failure.
+ */
+static size_t
+lblk_compress(struct lbd* lbd)
+{
+    int ret;
+    void *dbuf = lbd->buf;
+    u32 dlen = PBLK_SIZE * lblk_per_pblk(lbd->params);
+    void *cbuf = lbd->lz4_cbuf;
+    u32 clen = PBLK_SIZE * lblk_per_pblk(lbd->params);
+
+    ret = LZ4_compress_default(dbuf, cbuf, dlen, clen, lbd->lz4_wrkmem);
+    if (ret <= 0) {
+        return 0;
+    }
+
+    return (size_t)ret;
+}
+
+/*
+ * Decompress dc->lz4_cbuf of size clen into dc->lblk
+ *
+ * Returns 0 for success, <0 for failure.
+ */
+static int
+lblk_decompress(struct lbd* lbd, u32 clen)
+{
+    int ret;
+    void *cbuf = lbd->lz4_cbuf;
+    void *dbuf = lbd->buf;
+    u32 dlen = PBLK_SIZE * lblk_per_pblk(lbd->params);
+
+    ret = LZ4_decompress_safe(cbuf, dbuf, clen, dlen);
+    if (ret != dlen) {
+        printk(KERN_ERR "%s: failed, ret=%d (expected %u)\n", __func__, ret, (unsigned int)dlen);
+        return -1;
+    }
+
+    return 0;
+}
+
+bool
+lbd_ctr(struct lbd* lbd,
+        struct cbd_params* params,
+        struct lbatviewcache* lvc)
+{
+    memset(lbd, 0, sizeof(struct lbd));
+    lbd->lblk = LBLK_NONE;
+    mutex_init(&lbd->reflock);
+    lbd->ref = 0;
+    mutex_init(&lbd->lock);
+    lbd->state = CACHE_STATE_UNCACHED;
+    lbd->params = params;
+    lbd->lvc = lvc;
+    lbd->lv = NULL;
+    lbd->lz4_wrkmem = kmalloc(LZ4_compressBound(PBLK_SIZE * lblk_per_pblk(lbd->params)), GFP_KERNEL);
+    if (!lbd->lz4_wrkmem) {
+        return false;
+    }
+    lbd->lz4_cpages = cbd_alloc_pages(lblk_per_pblk(lbd->params));
+    if (!lbd->lz4_cpages) {
+        return false;
+    }
+    lbd->lz4_cbuf = page_address(lbd->lz4_cpages);
+    lbd->pages = cbd_alloc_pages(lblk_per_pblk(lbd->params));
+    if (!lbd->pages) {
+        return false;
+    }
+    lbd->buf = page_address(lbd->pages);
+
+    return true;
+}
+
+void
+lbd_dtr(struct lbd* lbd)
+{
+    if (lbatviewcache_put(lbd->lvc, lbd->lv) != 0) {
+        printk(KERN_ERR "%s: lbatviewcache_put failed\n", __func__);
+    }
+    lbd->lv = NULL;
+    cbd_free_pages(lbd->pages, lblk_per_pblk(lbd->params));
+    lbd->pages = NULL;
+    lbd->buf = NULL;
+    cbd_free_pages(lbd->lz4_cpages, lblk_per_pblk(lbd->params));
+    lbd->lz4_cpages = NULL;
+    lbd->lz4_cbuf = NULL;
+    kfree(lbd->lz4_wrkmem);
+    lbd->lz4_wrkmem = NULL;
+}
+
+static void
+lbd_flush_endio(struct bio* bio)
+{
+    int ret;
+
+    cbd_free_page(bio->bi_io_vec[0].bv_page);
+    ret = pblk_endio(bio);
+    if (ret) {
+        /* XXX: ...? */
+        printk(KERN_ERR "%s: I/O failed\n", __func__);
+    }
+}
+
+int
+lbd_flush(struct lbd* lbd)
+{
+    int ret = 0;
+    u32 c_len;
+    u32 elem_len;
+    u8* p;
+    u32 n;
+    u64 pblk;
+    struct page* iopagev[1];
+
+    mutex_lock(&lbd->lock);
+    if (lbd->state != CACHE_STATE_DIRTY) {
+        goto out;
+    }
+
+    if (lblk_is_zeros(lbd->params, lbd)) {
+        c_len = 0;
+        elem_len = 0;
+        p = NULL;
+    }
+    else {
+        c_len = lblk_compress(lbd);
+        if (c_len > 0) {
+            size_t c_blkrem = c_len % PBLK_SIZE;
+            if (c_blkrem) {
+                memset(lbd->lz4_cbuf + c_len, 0, c_blkrem);
+            }
+            elem_len = c_len;
+            p = lbd->lz4_cbuf;
+        }
+        else {
+            c_len = PBLK_SIZE * lblk_per_pblk(lbd->params);
+            elem_len = CBD_UNCOMPRESSED;
+            p = lbd->buf;
+        }
+    }
+
+    ret = lbatview_elem_realloc(lbd->lv, lbd->lblk, elem_len);
+    if (ret) {
+        goto out;
+    }
+    for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) {
+        pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n);
+        iopagev[0] = cbd_alloc_page();
+        if (!iopagev[0]) {
+            printk(KERN_ERR "%s: out of memory\n", __func__);
+            ret = -ENOMEM;
+            goto out;
+        }
+        memcpy(page_address(iopagev[0]), p, PBLK_SIZE);
+        pblk_write(lbd->params, pblk, 1, iopagev, lbd_flush_endio, lbd);
+    }
+    lbd->state = CACHE_STATE_CLEAN;
+
+out:
+    mutex_unlock(&lbd->lock);
+    return ret;
+}
+
+int
+lbd_read(struct lbd* lbd)
+{
+    int ret = 0;
+    u32 c_len;
+    u64 pblk;
+   struct page* iopagev[1];
+
+    ret = lbd_flush(lbd);
+    if (ret) {
+        return ret;
+    }
+    mutex_lock(&lbd->lock);
+    if (lbd->state == CACHE_STATE_CLEAN) {
+        goto out;
+    }
+    ret = lbatview_read(lbd->lv);
+    if (ret) {
+        printk(KERN_ERR "%s: lbat_read failed\n", __func__);
+        goto out;
+    }
+    c_len = lbatview_elem_len(lbd->lv, lbd->lblk);
+    if (c_len == 0) {
+        memset(lbd->buf, 0, PBLK_SIZE * lblk_per_pblk(lbd->params));
+    }
+    else {
+        bool is_compressed = true;
+        u32 d_len = PBLK_SIZE * lblk_per_pblk(lbd->params);
+        u32 n;
+        u8* p;
+
+        if (c_len == CBD_UNCOMPRESSED) {
+            is_compressed = false;
+            c_len = d_len;
+        }
+        p = lbd->lz4_cbuf;
+        for (n = 0; n * PBLK_SIZE < c_len; ++n, p += PBLK_SIZE) {
+            pblk = lbatview_elem_pblk(lbd->lv, lbd->lblk, n);
+            if (pblk == 0) {
+                printk(KERN_ERR "%s: pblk is zero at lblk=%lu n=%u\n", __func__,
+                        (unsigned long)lbd->lblk, n);
+                ret = -EIO;
+                goto out;
+            }
+            iopagev[0] = virt_to_page(p);
+            ret = pblk_read_wait(lbd->params, pblk, 1, iopagev);
+            if (ret) {
+                goto out;
+            }
+        }
+        if (is_compressed) {
+            if (lblk_decompress(lbd, c_len) != 0) {
+                printk(KERN_ERR "  decompress failed\n");
+                ret = -EIO;
+                goto out;
+            }
+        }
+        else {
+            memcpy(lbd->buf, lbd->lz4_cbuf, d_len);
+        }
+    }
+
+out:
+    mutex_unlock(&lbd->lock);
+    return ret;
+}
+
+bool
+lbd_reset(struct lbd* lbd, u64 lblk)
+{
+    bool ret = true;
+
+    BUG_ON(lbd->lblk == lblk);
+    lbd->lblk = lblk;
+    lbd->state = CACHE_STATE_UNCACHED;
+    if (lbatviewcache_put(lbd->lvc, lbd->lv) != 0) {
+        printk(KERN_ERR "%s: lbatviewcache_put failed\n", __func__);
+        ret = false;
+    }
+    lbd->lv = lbatviewcache_get(lbd->lvc, lblk);
+    if (!lbd->lv) {
+        printk(KERN_ERR "%s: lbatviewcache_get failed\n", __func__);
+        ret = false;
+    }
+
+    return ret;
+}
+
+void
+lbd_data_read(struct lbd* lbd, u32 off, u32 len, u8* buf)
+{
+    /* XXX: convert to BUG_ON */
+    if (off + len > PBLK_SIZE * lblk_per_pblk(lbd->params)) {
+        printk(KERN_ERR "%s: out of bounds\n", __func__);
+        return;
+    }
+    mutex_lock(&lbd->lock);
+    memcpy(buf, lbd->buf + off, len);
+    mutex_unlock(&lbd->lock);
+}
+
+void
+lbd_data_write(struct lbd* lbd, u32 off, u32 len, const u8* buf)
+{
+    /* XXX: convert to BUG_ON */
+    if (off + len > PBLK_SIZE * lblk_per_pblk(lbd->params)) {
+        printk(KERN_ERR "%s: out of bounds\n", __func__);
+        return;
+    }
+    mutex_lock(&lbd->lock);
+    memcpy(lbd->buf + off, buf, len);
+    lbd->state = CACHE_STATE_DIRTY;
+    mutex_unlock(&lbd->lock);
+}
+
+struct lbdcache
+{
+    struct mutex                lock;
+    struct cbd_params*          params;
+    struct lbatviewcache*       lvc;
+    unsigned int                len;
+    struct lbd**                cache;
+};
+
+size_t
+lbdcache_size(void)
+{
+    return sizeof(struct lbdcache);
+}
+
+static bool
+lbdcache_realloc(struct lbdcache* lc, unsigned int len)
+{
+    struct lbd** cache;
+    unsigned int n;
+    struct lbd* lbd;
+
+    cache = kzalloc(len * sizeof(struct lbd*), GFP_KERNEL);
+    if (!cache) {
+        return false;
+    }
+    n = 0;
+    if (lc->len) {
+        memcpy(cache, lc->cache, lc->len * sizeof(struct lbd*));
+        n = lc->len;
+        kfree(lc->cache);
+    }
+    lc->len = len;
+    lc->cache = cache;
+    while (n < len) {
+        lbd = kmalloc(sizeof(struct lbd), GFP_KERNEL);
+        if (!lbd) {
+            return false;
+        }
+        cache[n++] = lbd;
+        if (!lbd_ctr(lbd, lc->params, lc->lvc)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool
+lbdcache_ctr(struct lbdcache* lc,
+        struct cbd_params* params)
+{
+    memset(lc, 0, sizeof(struct lbdcache));
+    mutex_init(&lc->lock);
+    lc->params = params;
+    lc->lvc = kzalloc(lbatviewcache_size(), GFP_KERNEL);
+    if (!lc->lvc) {
+        return false;
+    }
+    if (!lbatviewcache_ctr(lc->lvc, params)) {
+        return false;
+    }
+
+    return lbdcache_realloc(lc, 1);
+}
+
+void
+lbdcache_dtr(struct lbdcache* lc)
+{
+    unsigned int n;
+    struct lbd* lbd;
+
+    for (n = 0; n < lc->len; ++n) {
+        lbd = lc->cache[n];
+        if (!lbd) {
+            continue;
+        }
+        lbd_dtr(lbd);
+        if (lbd->ref) {
+            printk(KERN_ERR "%s: lbd ref leak: n=%u ref=%u\n", __func__, n, lbd->ref);
+        }
+        kfree(lbd);
+    }
+    kfree(lc->cache);
+    lc->cache = NULL;
+    lc->len = 0;
+    lbatviewcache_dtr(lc->lvc);
+    kfree(lc->lvc);
+    lc->lvc = NULL;
+    lc->params = NULL;
+}
+
+struct lbd*
+lbdcache_get(struct lbdcache* lc, u64 lblk)
+{
+    unsigned int n;
+    struct lbd* lbd;
+
+    mutex_lock(&lc->lock);
+    for (n = 0; n < lc->len; ++n) {
+        lbd = lc->cache[n];
+        mutex_lock(&lbd->reflock);
+        if (lbd->lblk == lblk) {
+            ++lbd->ref;
+            mutex_unlock(&lbd->reflock);
+            goto out;
+        }
+        mutex_unlock(&lbd->reflock);
+    }
+    for (n = 0; n < lc->len; ++n) {
+        lbd = lc->cache[n];
+        mutex_lock(&lbd->reflock);
+        if (lbd->lblk == LBLK_NONE) {
+            goto found;
+        }
+        mutex_unlock(&lbd->reflock);
+    }
+    for (n = 0; n < lc->len; ++n) {
+        lbd = lc->cache[n];
+        mutex_lock(&lbd->reflock);
+        if (lbd->ref == 0) {
+            goto found;
+        }
+        mutex_unlock(&lbd->reflock);
+    }
+    printk(KERN_INFO "%s: all objects in use, realloc...\n", __func__);
+    n = lc->len;
+    if (!lbdcache_realloc(lc, lc->len * 2)) {
+        printk(KERN_ERR "%s: realloc failed\n", __func__);
+        lbd = NULL;
+        goto out;
+    }
+    lbd = lc->cache[n];
+    mutex_lock(&lbd->reflock);
+
+found:
+    if (!lbd_reset(lbd, lblk)) {
+        mutex_unlock(&lbd->reflock);
+        printk(KERN_ERR "%s: lbd_reset failed\n", __func__);
+        lbd = NULL;
+        goto out;
+    }
+    lbd->ref = 1;
+    mutex_unlock(&lbd->reflock);
+
+out:
+    mutex_unlock(&lc->lock);
+
+    return lbd;
+}
+
+int
+lbdcache_put(struct lbdcache* lc, struct lbd* lbd)
+{
+    int ret = 0;
+
+    if (!lbd) {
+        return 0;
+    }
+    mutex_lock(&lc->lock);
+    mutex_lock(&lbd->reflock);
+    if (--lbd->ref == 0) {
+        ret = lbd_flush(lbd);
+        if (ret) {
+            printk(KERN_ERR "%s: lbd_flush failed\n", __func__);
+        }
+    }
+    mutex_unlock(&lbd->reflock);
+    mutex_unlock(&lc->lock);
+
+    return ret;
+}
--- a/dm-compress/pbat.c
+++ b/dm-compress/pbat.c
@ -0,0 +1,370 @@
+/*
+ * Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA. 
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/device-mapper.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+
+#include <linux/lz4.h>
+
+#include <linux/dm-compress.h>
+
+struct pbat {
+    u32                         zone;
+    struct mutex                reflock;
+    unsigned int                ref;
+
+    struct mutex                lock;
+    enum cache_state            state;
+    struct cbd_params*          params;
+    struct page*                pages;
+    u8*                         buf;
+};
+
+bool
+pbat_ctr(struct pbat* pbat,
+         struct cbd_params* params)
+{
+    memset(pbat, 0, sizeof(struct pbat));
+    pbat->zone = ZONE_NONE;
+    mutex_init(&pbat->reflock);
+    pbat->ref = 0;
+    mutex_init(&pbat->lock);
+    pbat->state = CACHE_STATE_UNCACHED;
+    pbat->params = params;
+    pbat->pages = cbd_alloc_pages(pbat_len(params));
+    if (!pbat->pages) {
+        printk(KERN_ERR "%s: Failed to alloc pbat_buf\n", __func__);
+        return false;
+    }
+    pbat->buf = page_address(pbat->pages);
+
+    return true;
+}
+
+void
+pbat_dtr(struct pbat* pbat)
+{
+    pbat->buf = NULL;
+    cbd_free_pages(pbat->pages, pbat_len(pbat->params));
+    pbat->pages = NULL;
+}
+
+static void
+pbat_flush_endio(struct bio* bio)
+{
+    int ret;
+    unsigned int n;
+
+    for (n = 0; n < bio->bi_max_vecs; ++n) {
+        cbd_free_page(bio->bi_io_vec[0].bv_page);
+    }
+    ret = pblk_endio(bio);
+    if (ret) {
+        /*
+         * XXX:
+         * Set dm_compress.io_error?
+         * Set pbat.io_error?
+         * Set pbat.zone = ZONE_ERR?
+         */
+        printk(KERN_ERR "%s: XXX: I/O failed\n", __func__);
+    }
+}
+
+int
+pbat_flush(struct pbat* pbat)
+{
+    int ret = 0;
+    u32 count = pbat_len(pbat->params);
+    struct page* iopagev[count];
+    u64 pblk;
+    u32 n;
+    u8* iobuf;
+
+    mutex_lock(&pbat->lock);
+    if (pbat->state != CACHE_STATE_DIRTY) {
+        goto out;
+    }
+    pblk = pbat_off(pbat->params, pbat->zone);
+    if (!cbd_alloc_pagev(iopagev, count)) {
+        printk(KERN_ERR "%s: out of memory\n", __func__);
+        ret = -ENOMEM;
+        goto out;
+    }
+    for (n = 0; n < count; ++n) {
+        iobuf = page_address(iopagev[n]);
+        memcpy(iobuf, pbat->buf + n * PBLK_SIZE, PBLK_SIZE);
+    }
+    pblk_write(pbat->params, pblk, count, iopagev, pbat_flush_endio, pbat);
+    pbat->state = CACHE_STATE_CLEAN;
+
+out:
+    mutex_unlock(&pbat->lock);
+    return ret;
+}
+
+int
+pbat_read(struct pbat* pbat)
+{
+    int ret = 0;
+    u32 count = pbat_len(pbat->params);
+    struct page* pagev[count];
+    u64 pblk;
+    u32 n;
+
+    ret = pbat_flush(pbat);
+    if (ret) {
+        return ret;
+    }
+    mutex_lock(&pbat->lock);
+    if (pbat->state == CACHE_STATE_CLEAN) {
+        goto out;
+    }
+    pblk = pbat_off(pbat->params, pbat->zone);
+    for (n = 0; n < count; ++n) {
+        pagev[n] = virt_to_page(pbat->buf + n * PBLK_SIZE);
+    }
+    ret = pblk_read_wait(pbat->params, pblk, count, pagev);
+    if (ret) {
+        goto out;
+    }
+    pbat->state = CACHE_STATE_CLEAN;
+
+out:
+    mutex_unlock(&pbat->lock);
+    return ret;
+}
+
+void
+pbat_reset(struct pbat* pbat, u32 zone)
+{
+    BUG_ON(pbat->zone == zone);
+    pbat->zone = zone;
+    pbat->state = CACHE_STATE_UNCACHED;
+}
+
+u32
+pbat_zone(struct pbat* pbat)
+{
+    return pbat->zone;
+}
+
+u64
+pbat_alloc(struct pbat* pbat)
+{
+    u32 pblk_count = pbat_len(pbat->params) * PBLK_SIZE_BITS;
+    u64 idx;
+
+    mutex_lock(&pbat->lock);
+    idx = cbd_bitmap_alloc(pbat->buf, pblk_count);
+    if (idx == pblk_count) {
+        idx = PBLK_NONE;
+        goto out;
+    }
+    pbat->state = CACHE_STATE_DIRTY;
+
+out:
+    mutex_unlock(&pbat->lock);
+    return idx + zone_data_off(pbat->params, pbat->zone);
+}
+
+int
+pbat_free(struct pbat* pbat, u64 pblk)
+{
+    u32 zone_pblk_count = pbat_len(pbat->params) * PBLK_SIZE_BITS;
+    u32 zone;
+    u32 idx;
+
+    BUG_ON(pblk < CBD_HEADER_BLOCKS);
+    zone = (pblk - CBD_HEADER_BLOCKS) / zone_len(pbat->params);
+    BUG_ON(zone != pbat->zone);
+    if (pblk < zone_data_off(pbat->params, zone)) {
+        printk(KERN_ERR "%s: pblk in metadata\n", __func__);
+        return -EINVAL;
+    }
+    idx = pblk - zone_data_off(pbat->params, zone);
+    BUG_ON(idx >= zone_pblk_count);
+    mutex_lock(&pbat->lock);
+    cbd_bitmap_free(pbat->buf, idx);
+    pbat->state = CACHE_STATE_DIRTY;
+    mutex_unlock(&pbat->lock);
+
+    return 0;
+}
+
+struct pbatcache {
+    struct mutex                lock;
+    struct cbd_params*          params;
+    unsigned int                len;
+    struct pbat**               cache;
+};
+
+size_t
+pbatcache_size(void)
+{
+    return sizeof(struct pbatcache);
+}
+
+static bool
+pbatcache_realloc(struct pbatcache* pc, unsigned int len)
+{
+    struct pbat** cache;
+    unsigned int n;
+    struct pbat* pbat;
+
+    cache = kzalloc(len * sizeof(struct pbat*), GFP_KERNEL);
+    if (!cache) {
+        return false;
+    }
+    n = 0;
+    if (pc->len) {
+        memcpy(cache, pc->cache, pc->len * sizeof(struct pbat*));
+        n = pc->len;
+        kfree(pc->cache);
+    }
+    pc->len = len;
+    pc->cache = cache;
+    while (n < len) {
+        pbat = kmalloc(sizeof(struct pbat), GFP_KERNEL);
+        if (!pbat) {
+            return false;
+        }
+        cache[n++] = pbat;
+        if (!pbat_ctr(pbat, pc->params)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool
+pbatcache_ctr(struct pbatcache* pc,
+        struct cbd_params* params)
+{
+    memset(pc, 0, sizeof(struct pbatcache));
+    mutex_init(&pc->lock);
+    pc->params = params;
+
+    return pbatcache_realloc(pc, 1);
+}
+
+void
+pbatcache_dtr(struct pbatcache* pc)
+{
+    unsigned int n;
+    struct pbat* pbat;
+
+    for (n = 0; n < pc->len; ++n) {
+        pbat = pc->cache[n];
+        if (!pbat) {
+            continue;
+        }
+        pbat_dtr(pbat);
+        if (pbat->ref) {
+            printk(KERN_ERR "%s: pbat ref leak: n=%u ref=%u\n", __func__, n, pbat->ref);
+        }
+        kfree(pbat);
+    }
+    kfree(pc->cache);
+    pc->cache = NULL;
+    pc->len = 0;
+    pc->params = NULL;
+}
+
+struct pbat*
+pbatcache_get(struct pbatcache* pc, u32 zone)
+{
+    unsigned int n;
+    struct pbat* pbat;
+
+    mutex_lock(&pc->lock);
+    for (n = 0; n < pc->len; ++n) {
+        pbat = pc->cache[n];
+        mutex_lock(&pbat->reflock);
+        if (pbat->zone == zone) {
+            ++pbat->ref;
+            mutex_unlock(&pbat->reflock);
+            goto out;
+        }
+        mutex_unlock(&pbat->reflock);
+    }
+
+    for (n = 0; n < pc->len; ++n) {
+        pbat = pc->cache[n];
+        mutex_lock(&pbat->reflock);
+        if (pbat->zone == ZONE_NONE) {
+            goto found;
+        }
+        mutex_unlock(&pbat->reflock);
+    }
+    for (n = 0; n < pc->len; ++n) {
+        pbat = pc->cache[n];
+        mutex_lock(&pbat->reflock);
+        if (pbat->ref == 0) {
+            goto found;
+        }
+        mutex_unlock(&pbat->reflock);
+    }
+    printk(KERN_INFO "%s: all objects in use, realloc...\n", __func__);
+    n = pc->len;
+    if (!pbatcache_realloc(pc, pc->len * 2)) {
+        printk(KERN_ERR "%s: realloc failed\n", __func__);
+        pbat = NULL;
+        goto out;
+    }
+    pbat = pc->cache[n];
+    mutex_lock(&pbat->reflock);
+
+found:
+    pbat_reset(pbat, zone);
+    pbat->ref = 1;
+    mutex_unlock(&pbat->reflock);
+
+out:
+    mutex_unlock(&pc->lock);
+
+    return pbat;
+}
+
+int
+pbatcache_put(struct pbatcache* pc, struct pbat* pbat)
+{
+    int ret = 0;
+
+    if (!pbat) {
+        return 0;
+    }
+    mutex_lock(&pc->lock);
+    mutex_lock(&pbat->reflock);
+    if (--pbat->ref == 0) {
+        ret = pbat_flush(pbat);
+        if (ret) {
+            printk(KERN_ERR "%s: pbat_flush failed\n", __func__);
+        }
+    }
+    mutex_unlock(&pbat->reflock);
+    mutex_unlock(&pc->lock);
+
+    return ret;
+}
--- a/dm-compress/util.c
+++ b/dm-compress/util.c
@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA. 
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/bio.h>
+#include <linux/device-mapper.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+
+#include <linux/lz4.h>
+
+#include <linux/dm-compress.h>
+
+/**************************************
+ * Core memory management.
+ **************************************/
+
+struct page*
+cbd_alloc_page(void)
+{
+    return alloc_page(GFP_KERNEL);
+}
+
+void
+cbd_free_page(struct page* page)
+{
+    __free_page(page);
+}
+
+struct page*
+cbd_alloc_pages(size_t len)
+{
+    return alloc_pages(GFP_KERNEL, get_order(len * PAGE_SIZE));
+}
+
+void
+cbd_free_pages(struct page* pages, size_t len)
+{
+    __free_pages(pages, get_order(len * PAGE_SIZE));
+}
+
+bool
+cbd_alloc_pagev(struct page** pagev, size_t len)
+{
+    size_t n;
+
+    for (n = 0; n < len; ++n) {
+        pagev[n] = cbd_alloc_page();
+        if (!pagev[n]) {
+            goto err;
+        }
+    }
+
+    return true;
+
+err:
+    while (n--) {
+        cbd_free_page(pagev[n]);
+        pagev[n] = NULL;
+    }
+    return false;
+}
+
+void
+cbd_free_pagev(struct page** pagev, size_t len)
+{
+    size_t n;
+
+    for (n = 0; n < len; ++n) {
+        cbd_free_page(pagev[n]);
+        pagev[n] = NULL;
+    }
+}
+
+/**************************************
+ * Core low-level I/O.
+ *
+ * pblk count are in units of physical blocks (4096 bytes), NOT sectors.
+ * data is a page address (obtained via __get_free_pages and friends).
+ **************************************/
+
+static struct bio*
+pblk_io_prepare(struct cbd_params* params, unsigned int op,
+        u64 pblk, u32 count, struct page** pagev)
+{
+    struct bio* bio;
+    u32 n;
+
+    bio = bio_alloc(GFP_KERNEL, count);
+    if (!bio) {
+        printk(KERN_ERR "%s: out of memory\n", __func__);
+        return NULL;
+    }
+    bio_set_dev(bio, (struct block_device*)params->priv);
+    bio->bi_opf = op;
+
+    bio->bi_iter.bi_sector = (pblk << (PBLK_SHIFT - SECTOR_SHIFT));
+    for (n = 0; n < count; ++n) {
+        if (bio_add_page(bio, pagev[n], PAGE_SIZE, 0) != PAGE_SIZE) {
+            BUG();
+        }
+    }
+
+    return bio;
+}
+
+int
+pblk_read_wait(struct cbd_params* params,
+        u64 pblk, u32 count, struct page** pagev)
+{
+    int ret;
+    struct bio* bio;
+
+    bio = pblk_io_prepare(params, REQ_OP_READ, pblk, count, pagev);
+    if (!bio) {
+        printk(KERN_ERR "%s: out of memory\n", __func__);
+        return -ENOMEM;
+    }
+
+    ret = submit_bio_wait(bio);
+    if (ret != 0) {
+        printk(KERN_ERR "%s: submit_bio_wait failed: %d\n", __func__, ret);
+    }
+    bio_put(bio);
+
+    return ret;
+}
+
+int
+pblk_read(struct cbd_params* params,
+        u64 pblk, u32 count, struct page** pagev,
+        pblk_endio_t endio, void* endio_priv)
+{
+    int ret;
+    struct bio* bio;
+
+    bio = pblk_io_prepare(params, REQ_OP_READ, pblk, count, pagev);
+    if (!bio) {
+        printk(KERN_ERR "%s: out of memory\n", __func__);
+        return -ENOMEM;
+    }
+    bio->bi_end_io = endio;
+    bio->bi_private = endio_priv;
+
+    ret = submit_bio(bio);
+    if (ret != 0) {
+        printk(KERN_ERR "%s: submit_bio_wait failed: %d\n", __func__, ret);
+    }
+    bio_put(bio);
+
+    return ret;
+}
+
+void
+pblk_write(struct cbd_params* params,
+        u64 pblk, u32 count, struct page** pagev,
+        pblk_endio_t endio, void* endio_priv)
+{
+    struct bio* bio;
+
+    bio = pblk_io_prepare(params, REQ_OP_WRITE, pblk, count, pagev);
+    if (!bio) {
+        printk(KERN_ERR "%s: out of memory\n", __func__);
+        return;
+    }
+    bio->bi_end_io = endio;
+    bio->bi_private = endio_priv;
+
+    if (pblk < CBD_HEADER_BLOCKS) {
+        printk(KERN_ERR "%s: *** Attempt to write header\n", __func__);
+        dump_stack();
+        bio->bi_status = BLK_STS_IOERR;
+        endio(bio);
+        return;
+    }
+
+    submit_bio(bio);
+}
+
+int
+pblk_endio(struct bio* bio)
+{
+    int ret;
+
+    ret = blk_status_to_errno(bio->bi_status);
+    bio_put(bio);
+    return ret;
+}
--- a/include/linux/dm-compress.h
+++ b/include/linux/dm-compress.h
@ -2,19 +2,19 @@
 #define _LINUX_DM_COMPRESS_H

 #ifndef SECTOR_SHIFT
-#define SECTOR_SHIFT 9
+#define SECTOR_SHIFT    9
 #endif
 #ifndef SECTOR_SIZE
-#define SECTOR_SIZE (1 << SECTOR_SHIFT)
+#define SECTOR_SIZE     (1 << SECTOR_SHIFT)
 #endif

-#define PBLK_SHIFT 12
-#define PBLK_SIZE (1 << PBLK_SHIFT)
-#define PBLK_SIZE_BITS (PBLK_SIZE * BITS_PER_BYTE)
+#define PBLK_SHIFT      12
+#define PBLK_SIZE       (1 << PBLK_SHIFT)
+#define PBLK_SIZE_BITS  (PBLK_SIZE * BITS_PER_BYTE)
 #define PBLK_PER_SECTOR (1 << (PBLK_SHIFT - SECTOR_SHIFT))

-#define LBLK_SHIFT_MIN          1
-#define LBLK_SHIFT_MAX          (20 - PBLK_SHIFT)
+#define LBLK_SHIFT_MIN  1
+#define LBLK_SHIFT_MAX  (20 - PBLK_SHIFT)

 #define CBD_HEADER_BLOCKS       1

@ -33,12 +33,13 @@ enum cbd_alg {
 };

 struct cbd_params {
-    u8                  algorithm;      /* cbd_alg */
+    u8                  algorithm;      /* enum cbd_alg */
    u8                  compression;    /* 0..9 */
    u16                 lblk_shift;
    u64                 nr_pblk;
    u32                 nr_zones;
    u32                 lblk_per_zone;
+    void*               priv;
 };

 struct cbd_header {
@ -48,10 +49,10 @@ struct cbd_header {
    struct cbd_params   params;
 };

-struct lbat_elem
+struct lba
 {
-    u32         len;        /* Compressed length */
-    u64         pblk[1];    /* Vector of physical blocks */
+    u32                 len;        /* Compressed length */
+    u64                 pblk[1];    /* Vector of physical blocks */
 };

 static inline void
@ -151,6 +152,36 @@ put64_le(u8** raw, u64 val)
    *raw += sizeof(leval);
 }

+/* XXX: Use kernel bit functions */
+static inline u32
+cbd_bitmap_alloc(u8* buf, u32 bitsize)
+{
+    u32 off = 0;
+    u32 bit = 0;
+
+    for (off = 0; off < bitsize / BITS_PER_BYTE; ++off) {
+        if (buf[off] != 0xff) {
+            while (buf[off] & (1 << bit)) {
+                ++bit;
+            }
+            buf[off] |= (1 << bit);
+            break;
+        }
+    }
+
+    return off * BITS_PER_BYTE + bit;
+}
+
+/* XXX: Use kernel bit functions */
+static inline void
+cbd_bitmap_free(u8* buf, u32 idx)
+{
+    u32 off = idx / BITS_PER_BYTE;
+    u32 bit = idx % BITS_PER_BYTE;
+
+    buf[off] &= ~(1 << bit);
+}
+


 static inline u32
@ -166,7 +197,7 @@ pbat_len(const struct cbd_params* params)
 }

 static inline u32
-lbat_elem_len(const struct cbd_params* params)
+lba_len(const struct cbd_params* params)
 {
    u32 elem_len_bytes = (params->lblk_shift + PBLK_SHIFT > 16) ? 4 : 2;
    u32 elem_pblk_bytes = (params->nr_pblk <= 0xffff ? 2 :
@ -177,7 +208,7 @@ lbat_elem_len(const struct cbd_params* params)
 static inline u32
 lbat_len(const struct cbd_params* params)
 {
-    return DIV_ROUND_UP(params->lblk_per_zone * lbat_elem_len(params), PBLK_SIZE);
+    return DIV_ROUND_UP(params->lblk_per_zone * lba_len(params), PBLK_SIZE);
 }

 static inline u32
@ -255,103 +286,242 @@ cbd_header_put(u8* buf, const struct cbd_header* header)
    put32_le(&buf, header->params.lblk_per_zone);
 }

-/*
- * XXX:
- *   nr_bits = pbat_len(params) * PBLK_SIZE;
- *   bit = find_next_zero_bit_le(buf, nr_bits);
- *   if (bit < nr_bits) {
- *       set_bit_le(bit, buf);
- *   }
- *   return bit;
- */
 static inline u32
-cbd_bitmap_alloc(u8* buf, u32 bitsize)
+lba_len_get(const struct cbd_params* params, const u8* buf)
 {
-    u32 off = 0;
-    u32 bit = 0;
-
-    for (off = 0; off < bitsize / BITS_PER_BYTE; ++off) {
-        if (buf[off] != 0xff) {
-            while (buf[off] & (1 << bit)) {
-                ++bit;
-            }
-            buf[off] |= (1 << bit);
-            break;
-        }
+    if (params->lblk_shift + PBLK_SHIFT > 16) {
+        return get32_le(&buf);
+    }
+    else {
+        return get16_le(&buf);
    }
-
-    return off * BITS_PER_BYTE + bit;
 }

-/*
- * XXX:
- *   clear_bit_le(bit, buf);
- */
 static inline void
-cbd_bitmap_free(u8* buf, u32 idx)
+lba_len_put(const struct cbd_params* params, u8* buf, u32 val)
 {
-    u32 off = idx / BITS_PER_BYTE;
-    u32 bit = idx % BITS_PER_BYTE;
+    if (params->lblk_shift + PBLK_SHIFT > 16) {
+        put32_le(&buf, val);
+    }
+    else {
+        put16_le(&buf, val);
+    }
+}

-    buf[off] &= ~(1 << bit);
+static inline u64
+lba_pblk_get(const struct cbd_params* params, const u8* buf, u32 idx)
+{
+    const u8* p = buf;
+    p += (params->lblk_shift + PBLK_SHIFT > 16) ? 4 : 2;
+    if (params->nr_pblk <= 0xffff) {
+        p += 2 * idx;
+        return get16_le(&p);
+    }
+    else if (params->nr_pblk <= 0xffffffff) {
+        p += 4 * idx;
+        return get32_le(&p);
+    }
+    else {
+        p += 6 * idx;
+        return get48_le(&p);
+    }
 }

 static inline void
-lbat_elem_get(const struct cbd_params* params,
-        const u8* buf, struct lbat_elem* elem)
+lba_pblk_put(const struct cbd_params* params, u8* buf, u32 idx, u64 val)
+{
+    u8* p = buf;
+    p += (params->lblk_shift + PBLK_SHIFT > 16) ? 4 : 2;
+    if (params->nr_pblk <= 0xffff) {
+        p += 2 * idx;
+        put16_le(&p, val);
+    }
+    else if (params->nr_pblk <= 0xffffffff) {
+        p += 4 * idx;
+        put32_le(&p, val);
+    }
+    else {
+        p += 6 * idx;
+        put48_le(&p, val);
+    }
+}
+
+static inline void
+lba_get(const struct cbd_params* params,
+        const u8* buf, struct lba* lba)
 {
    u32 n;

    if (params->lblk_shift + PBLK_SHIFT > 16) {
-        elem->len = get32_le(&buf);
+        lba->len = get32_le(&buf);
    }
    else {
-        elem->len = get16_le(&buf);
+        lba->len = get16_le(&buf);
    }
    if (params->nr_pblk <= 0xffff) {
        for (n = 0; n < lblk_per_pblk(params); ++n) {
-            elem->pblk[n] = get16_le(&buf);
+            lba->pblk[n] = get16_le(&buf);
        }
    }
    else if (params->nr_pblk <= 0xffffffff) {
        for (n = 0; n < lblk_per_pblk(params); ++n) {
-            elem->pblk[n] = get32_le(&buf);
+            lba->pblk[n] = get32_le(&buf);
        }
    }
    else {
        for (n = 0; n < lblk_per_pblk(params); ++n) {
-            elem->pblk[n] = get48_le(&buf);
+            lba->pblk[n] = get48_le(&buf);
        }
    }
 }

 static inline void
-lbat_elem_put(const struct cbd_params* params,
-        u8* buf, const struct lbat_elem* elem)
+lba_put(const struct cbd_params* params,
+        u8* buf, const struct lba* lba)
 {
    u32 n;

    if (params->lblk_shift + PBLK_SHIFT > 16) {
-        put32_le(&buf, elem->len);
+        put32_le(&buf, lba->len);
    }
    else {
-        put16_le(&buf, elem->len);
+        put16_le(&buf, lba->len);
    }
    if (params->nr_pblk <= 0xffff) {
        for (n = 0; n < lblk_per_pblk(params); ++n) {
-            put16_le(&buf, elem->pblk[n]);
+            put16_le(&buf, lba->pblk[n]);
        }
    }
    else if (params->nr_pblk <= 0xffffffff) {
        for (n = 0; n < lblk_per_pblk(params); ++n) {
-            put32_le(&buf, elem->pblk[n]);
+            put32_le(&buf, lba->pblk[n]);
        }
    }
    else {
        for (n = 0; n < lblk_per_pblk(params); ++n) {
-            put48_le(&buf, elem->pblk[n]);
+            put48_le(&buf, lba->pblk[n]);
        }
    }
 }

+#ifdef __KERNEL__
+
+#define ZONE_NONE               (u32)(~0)
+#define PBLK_NONE               (u64)(~0)
+#define LBLK_NONE               (u64)(~0)
+
+enum cache_state {
+    CACHE_STATE_UNCACHED,
+    CACHE_STATE_CLEAN,
+    CACHE_STATE_DIRTY,
+    CACHE_STATE_MAX
+};
+
+typedef void (*pblk_endio_t)(struct bio*);
+
+/* Single page allocator */
+struct page*
+       cbd_alloc_page(void);
+void   cbd_free_page(struct page* page);
+/* Multiple page allocator */
+struct page*
+       cbd_alloc_pages(size_t len);
+void   cbd_free_pages(struct page* pages, size_t len);
+/* Vector page allocator */
+bool   cbd_alloc_pagev(struct page** pagev, size_t len);
+void   cbd_free_pagev(struct page** pagev, size_t len);
+
+int    pblk_read_wait(struct cbd_params* params,
+        u64 pblk, u32 count, struct page** pagev);
+int    pblk_read(struct cbd_params* params,
+        u64 pblk, u32 count, struct page** pagev,
+        pblk_endio_t endio, void* endio_priv);
+void   pblk_write(struct cbd_params* params,
+        u64 pblk, u32 count, struct page** pagev,
+        pblk_endio_t endio, void* endio_priv);
+int    pblk_endio(struct bio* bio);
+
+struct pbat;
+bool   pbat_ctr(struct pbat* pbat,
+        struct cbd_params* params);
+void   pbat_dtr(struct pbat* pbat);
+int    pbat_flush(struct pbat* pbat);
+int    pbat_read(struct pbat* pbat);
+void   pbat_reset(struct pbat* pbat, u32 zone);
+u32    pbat_zone(struct pbat* pbat);
+u64    pbat_alloc(struct pbat* pbat);
+int    pbat_free(struct pbat* pbat, u64 pblk);
+
+struct pbatcache;
+size_t pbatcache_size(void);
+bool   pbatcache_ctr(struct pbatcache* pbatcache,
+        struct cbd_params* params);
+void   pbatcache_dtr(struct pbatcache* pbatcache);
+struct pbat*
+       pbatcache_get(struct pbatcache* pbatcache, u32 zone);
+int    pbatcache_put(struct pbatcache* pbatcache, struct pbat* pbat);
+
+
+struct lbatpage;
+bool   lbatpage_ctr(struct lbatpage* lp, struct cbd_params* params);
+void   lbatpage_dtr(struct lbatpage* lp);
+int    lbatpage_flush(struct lbatpage* lp);
+int    lbatpage_read(struct lbatpage* lp);
+void   lbatpage_reset(struct lbatpage* lp, u64 pblk);
+u8*    lbatpage_get_buf(struct lbatpage* lp, bool rw);
+void   lbatpage_put_buf(struct lbatpage* lp);
+  
+struct lbatpagecache;
+size_t lbatpagecache_size(void);
+bool   lbatpagecache_ctr(struct lbatpagecache* lpc,
+        struct cbd_params* params);
+void   lbatpagecache_dtr(struct lbatpagecache* lpc);
+struct lbatpage*
+       lbatpagecache_get(struct lbatpagecache* lpc, u64 pblk);
+int    lbatpagecache_put(struct lbatpagecache* lpc, struct lbatpage* lpi);
+
+struct lbatview;
+bool   lbatview_ctr(struct lbatview* lv,
+        struct cbd_params* params,
+        struct pbatcache* pbatcache,
+        struct lbatpagecache* lpc);
+void   lbatview_dtr(struct lbatview* lv);
+int    lbatview_flush(struct lbatview* lv);
+int    lbatview_read(struct lbatview* lv);
+bool   lbatview_reset(struct lbatview* lv, u64 pblk, u32 count);
+int    lbatview_elem_realloc(struct lbatview* lv, u64 lblk, u32 len);
+u32    lbatview_elem_len(struct lbatview* lv, u64 lblk);
+u64    lbatview_elem_pblk(struct lbatview* lv, u64 lblk, u32 idx);
+
+struct lbatviewcache;
+size_t lbatviewcache_size(void);
+bool   lbatviewcache_ctr(struct lbatviewcache* lvc,
+        struct cbd_params* params);
+void   lbatviewcache_dtr(struct lbatviewcache* lvc);
+struct lbatview*
+       lbatviewcache_get(struct lbatviewcache* lvc, u64 lblk);
+int    lbatviewcache_put(struct lbatviewcache* lvc, struct lbatview* lbv);
+
+struct lbd;
+bool   lbd_ctr(struct lbd* lbd,
+        struct cbd_params* params,
+        struct lbatviewcache* lvc);
+void   lbd_dtr(struct lbd* lbd);
+int    lbd_flush(struct lbd* lbd);
+int    lbd_read(struct lbd* lbd);
+bool   lbd_reset(struct lbd* lbd, u64 lblk);
+void   lbd_data_read(struct lbd* lbd, u32 off, u32 len, u8* buf);
+void   lbd_data_write(struct lbd* lbd, u32 off, u32 len, const u8* buf);
+
+struct lbdcache;
+size_t lbdcache_size(void);
+bool   lbdcache_ctr(struct lbdcache* lc,
+        struct cbd_params* params);
+void   lbdcache_dtr(struct lbdcache* lc);
+struct lbd*
+       lbdcache_get(struct lbdcache* lc, u64 lblk);
+int    lbdcache_put(struct lbdcache* lc, struct lbd* lbd);
+
+#endif
+
 #endif /* _LINUX_DM_COMPRESS_H */
--- a/libcbd/check.c
+++ b/libcbd/check.c
@ -63,45 +63,69 @@ check_one_lblk(const struct cbd_params* params,
               const struct zone_metadata* zm,
               u8** pblk_used)
 {
-    struct lbat_elem* elem;
-    u8* elem_buf;
+    struct lba* lba;
+    u8* lba_buf;
+    u32 c_len;
    u32 n;
    u64 pblk;
-    u32 rel_pblk;

-    elem = calloc(1, offsetof(struct lbat_elem, pblk[lblk_per_pblk(params)]));
-    elem_buf = zm->lbat + lblk * lbat_elem_len(params);
-    lbat_elem_get(params, elem_buf, elem);
-    printf("  lblk[%u]: len=%u\n", lblk, elem->len);
+    lba = calloc(1, offsetof(struct lba, pblk[lblk_per_pblk(params)]));
+    lba_buf = zm->lbat + lblk * lba_len(params);
+    lba_get(params, lba_buf, lba);
+    if (lba->len) {
+        if (lba->len == CBD_UNCOMPRESSED) {
+            printf("  lblk[%u]: UNCOMPRESSED\n", lblk);
+        }
+        else {
+            printf("  lblk[%u]: len=%u\n", lblk, lba->len);
+        }
+    }
+    if (lba->len > PBLK_SIZE * lblk_per_pblk(params)) {
+        printf("    Length out of bounds\n");
+        return;
+    }
+    c_len = (lba->len == CBD_UNCOMPRESSED) ? PBLK_SIZE * lblk_per_pblk(params) : lba->len;
    for (n = 0; n < lblk_per_pblk(params); ++n) {
-        pblk = elem->pblk[n];
-        if (elem->len > PBLK_SIZE * n) {
-            /* XXX: allow out-of-zone allocs for v1.1 */
-            if (pblk < zone_data_off(params, zone) || pblk >= zone_off(params, zone + 1)) {
-                printf("Alloc out of bounds for zone %u block %u index %u: %lu\n",
-                        (unsigned int)zone, lblk, n,
-                        (unsigned long)pblk);
+        pblk = lba->pblk[n];
+        if (c_len > PBLK_SIZE * n) {
+            u32 pblk_zone;
+            u32 rel_pblk;
+            if (pblk < CBD_HEADER_BLOCKS) {
+                printf("    [%u] :E: Alloc in header: %lu\n", n, pblk);
                continue;
            }
-            rel_pblk = pblk - zone_data_off(params, zone);
-            printf("    [%u] pblk=%lu rel_pblk=%u\n", n, (unsigned long)pblk, rel_pblk);
-            if (pblk_used[zone][rel_pblk/8] & (1 << (rel_pblk % 8))) {
-                printf("Duplicate allocation for zone %u block %u\n",
-                        (unsigned int)zone, (unsigned int)rel_pblk);
+            pblk_zone = (pblk - CBD_HEADER_BLOCKS) / zone_len(params);
+            if (pblk_zone >= params->nr_zones) {
+                printf("    [%u] :E: Alloc beyond end: %lu\n", n, pblk);
                continue;
            }
-            pblk_used[zone][rel_pblk/8] |= (1 << (rel_pblk % 8));
+            if (pblk < zone_data_off(params, pblk_zone)) {
+                printf("    [%u] :E: Alloc in metadata: %lu\n", n, pblk);
+                continue;
+            }
+            rel_pblk = pblk - zone_data_off(params, pblk_zone);
+            /* XXX: Cannot happen? */
+            if (rel_pblk >= pbat_len(params) * PBLK_SIZE_BITS) {
+                printf("    [%u] :E: Alloc out of zone: %lu\n", n, pblk);
+                continue;
+            }
+            printf("    [%u] pblk=%lu pblk_zone=%u rel_pblk=%u\n", n,
+                    (unsigned long)pblk, pblk_zone, rel_pblk);
+            if (pblk_used[pblk_zone][rel_pblk/8] & (1 << (rel_pblk % 8))) {
+                printf("    [%u] :E: Duplicate allocation for zone %u block %u\n",
+                        n, pblk_zone, rel_pblk);
+                continue;
+            }
+            pblk_used[pblk_zone][rel_pblk/8] |= (1 << (rel_pblk % 8));
        }
        else {
            if (pblk) {
-                printf("Unexpected pblk alloc for zone %u block %u index %u: %lu\n",
-                        (unsigned int)zone, lblk, n,
-                        (unsigned long)pblk);
+                printf("    [%u] :E: Unexpected pblk alloc: %lu\n", n, pblk);
            }
        }
    }

-    free(elem);
+    free(lba);
 }

 static void
@ -110,12 +134,29 @@ check_one_zone(const struct cbd_params* params,
               const struct zone_metadata* zm,
               u8** pblk_used)
 {
+    u32 lblk_alloc_len;
+    u32 n;
+    bool zone_empty;
    u32 lblk;

-    printf("Zone %u: alloc [%lu .. %lu]\n",
+    printf("Zone %u: lbat=[%lu..%lu] alloc=[%lu .. %lu]\n",
            (unsigned int)zone,
+            (unsigned long)zone_off(params, zone),
+            (unsigned long)(zone_data_off(params, zone) - 1),
            (unsigned long)zone_data_off(params, zone),
            (unsigned long)zone_off(params, zone + 1));
+    zone_empty = true;
+    lblk_alloc_len = params->lblk_per_zone * lba_len(params);
+    for (n = 0; n < lblk_alloc_len; ++n) {
+        if (zm->lbat[n]) {
+            zone_empty = false;
+            break;
+        }
+    }
+    if (zone_empty) {
+        printf("  [empty]\n");
+        return;
+    }
    for (lblk = 0; lblk < params->lblk_per_zone; ++lblk) {
        check_one_lblk(params, zone, lblk,  zm, pblk_used);
    }
@ -130,7 +171,7 @@ check_zone_metadata(const struct cbd_params* params,

    pblk_used = calloc(params->nr_zones, sizeof(void*));
    for (zone = 0; zone < params->nr_zones; ++zone) {
-        pblk_used[zone] = calloc(1, pbat_len(params));
+        pblk_used[zone] = calloc(pbat_len(params), PBLK_SIZE);
    }

    for (zone = 0; zone < params->nr_zones; ++zone) {
@ -164,7 +205,7 @@ cbd_check(const char* dev,

    zmvec = calloc(header.params.nr_zones, sizeof(struct zone_metadata));
    for (zone = 0; zone < header.params.nr_zones; ++zone) {
-        zmvec[zone].pbat = calloc(1, PBLK_SIZE);
+        zmvec[zone].pbat = calloc(pbat_len(&header.params), PBLK_SIZE);
        pblk_read(devfd,
                  pbat_off(&header.params, zone),
                  pbat_len(&header.params),