cbd/dm-compress/pbat.c

451 lines
11 KiB
C
Raw Normal View History

/*
* Copyright (c) 2019 Tom Marshall <tdm.code@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/device-mapper.h>
#include <linux/workqueue.h>
#include <linux/mutex.h>
#include <linux/lz4.h>
#include <linux/dm-compress.h>
2019-10-30 18:23:45 +01:00
#define CPU_NONE (~0)
struct iostate {
int cpu;
byte hash[MD5_DIGEST_SIZE];
};
struct pbat {
u32 zone;
struct mutex reflock;
unsigned int ref;
struct mutex lock;
enum cache_state state;
struct cbd_params* params;
struct page* pages;
u8* buf;
2019-10-30 18:23:45 +01:00
struct iostate lrs; /* last read state */
struct iostate lws; /* last written state */
};
2019-10-30 18:23:45 +01:00
static void
pbat_save_state(struct pbat* pbat)
{
struct md5 ctx;
md5_init(&ctx);
md5_update(&ctx, pbat->buf, PBLK_SIZE);
md5_final(&ctx, pbat->lws.hash);
pbat->lws.cpu = smp_processor_id();
}
static bool
pbat_check_state(struct pbat* pbat)
{
struct md5 ctx;
byte hash[MD5_DIGEST_SIZE];
if (pbat->lws.cpu == CPU_NONE) {
pbat_save_state(pbat);
memcpy(&pbat->lrs, &pbat->lws, sizeof(struct iostate));
return true;
}
md5_init(&ctx);
md5_update(&ctx, pbat->buf, PBLK_SIZE);
md5_final(&ctx, hash);
if (!memcmp(hash, pbat->lws.hash, MD5_DIGEST_SIZE)) {
return true;
}
if (!memcmp(hash, pbat->lrs.hash, MD5_DIGEST_SIZE)) {
printk(KERN_ERR "%s: read stale data: wcpu=%d curcpu=%d\n", __func__,
pbat->lws.cpu, smp_processor_id());
return false;
}
printk(KERN_ERR "%s: read bogus data: wcpu=%d curcpu=%d\n", __func__,
pbat->lws.cpu, smp_processor_id());
return false;
}
2019-10-25 19:03:00 +02:00
static bool
pbat_ctr(struct pbat* pbat,
struct cbd_params* params)
{
memset(pbat, 0, sizeof(struct pbat));
pbat->zone = ZONE_NONE;
mutex_init(&pbat->reflock);
pbat->ref = 0;
mutex_init(&pbat->lock);
pbat->state = CACHE_STATE_UNCACHED;
pbat->params = params;
pbat->pages = cbd_alloc_pages(pbat_len(params));
if (!pbat->pages) {
printk(KERN_ERR "%s: Failed to alloc pbat_buf\n", __func__);
return false;
}
pbat->buf = page_address(pbat->pages);
2019-10-30 18:23:45 +01:00
pbat->lrs.cpu = CPU_NONE;
pbat->lws.cpu = CPU_NONE;
return true;
}
2019-10-25 19:03:00 +02:00
static void
pbat_dtr(struct pbat* pbat)
{
pbat->buf = NULL;
cbd_free_pages(pbat->pages, pbat_len(pbat->params));
pbat->pages = NULL;
}
static void
pbat_flush_endio(struct bio* bio)
{
struct pbat* pbat = bio->bi_private;
int ret;
unsigned int n;
for (n = 0; n < bio->bi_max_vecs; ++n) {
cbd_free_page(bio->bi_io_vec[0].bv_page);
}
ret = pblk_endio(bio);
if (ret) {
printk(KERN_ERR "%s: I/O failed\n", __func__);
pbat->state = CACHE_STATE_ERROR;
}
}
2019-10-25 19:03:00 +02:00
static int
pbat_flush(struct pbat* pbat)
{
int ret = 0;
u32 count = pbat_len(pbat->params);
struct page* iopagev[count];
u64 pblk;
u32 n;
u8* iobuf;
mutex_lock(&pbat->lock);
if (pbat->state != CACHE_STATE_DIRTY) {
if (pbat->state == CACHE_STATE_ERROR) {
ret = -EIO;
}
goto out;
}
pblk = pbat_off(pbat->params, pbat->zone);
if (!cbd_alloc_pagev(iopagev, count)) {
printk(KERN_ERR "%s: out of memory\n", __func__);
ret = -ENOMEM;
goto out;
}
for (n = 0; n < count; ++n) {
iobuf = page_address(iopagev[n]);
memcpy(iobuf, pbat->buf + n * PBLK_SIZE, PBLK_SIZE);
}
2019-10-30 18:23:45 +01:00
pbat_save_state(pbat);
pblk_write(pbat->params, pblk, count, iopagev, pbat_flush_endio, pbat);
pbat->state = CACHE_STATE_CLEAN;
out:
mutex_unlock(&pbat->lock);
return ret;
}
2019-10-25 19:03:00 +02:00
static void
pbat_reset(struct pbat* pbat, u32 zone)
{
if (pbat->zone != zone) {
pbat->zone = zone;
pbat->state = CACHE_STATE_UNCACHED;
}
}
int
pbat_read(struct pbat* pbat)
{
int ret = 0;
u32 count = pbat_len(pbat->params);
struct page* pagev[count];
u64 pblk;
u32 n;
2019-10-30 18:23:45 +01:00
bool retried = false;
mutex_lock(&pbat->lock);
if (pbat->state != CACHE_STATE_UNCACHED) {
goto out;
}
pblk = pbat_off(pbat->params, pbat->zone);
for (n = 0; n < count; ++n) {
pagev[n] = virt_to_page(pbat->buf + n * PBLK_SIZE);
}
2019-10-30 18:23:45 +01:00
again:
ret = pblk_read_wait(pbat->params, pblk, count, pagev);
if (ret) {
goto out;
}
2019-10-30 18:23:45 +01:00
if (!pbat_check_state(pbat)) {
printk(KERN_ERR "%s: check state failed, retrying\n", __func__);
yield();
retried = true;
goto again;
}
if (retried) {
printk(KERN_ERR "%s: read good data after retry\n", __func__);
}
pbat->state = CACHE_STATE_CLEAN;
out:
mutex_unlock(&pbat->lock);
return ret;
}
u32
pbat_zone(struct pbat* pbat)
{
return pbat->zone;
}
u64
pbat_alloc(struct pbat* pbat)
{
u32 pblk_count = pbat_len(pbat->params) * PBLK_SIZE_BITS;
u32 idx;
u64 pblk;
mutex_lock(&pbat->lock);
BUG_ON(pbat->state == CACHE_STATE_UNCACHED);
idx = cbd_bitmap_alloc(pbat->buf, pblk_count);
if (idx == pblk_count) {
pblk = PBLK_NONE;
goto out;
}
pblk = idx + zone_data_off(pbat->params, pbat->zone);
pbat->state = CACHE_STATE_DIRTY;
out:
mutex_unlock(&pbat->lock);
return pblk;
}
int
pbat_free(struct pbat* pbat, u64 pblk)
{
u32 zone_pblk_count = pbat_len(pbat->params) * PBLK_SIZE_BITS;
u32 zone;
u32 idx;
2019-10-25 19:03:00 +02:00
zone = zone_for_pblk(pbat->params, pblk);
BUG_ON(zone != pbat->zone);
if (pblk < zone_data_off(pbat->params, zone)) {
printk(KERN_ERR "%s: pblk in metadata\n", __func__);
return -EINVAL;
}
idx = pblk - zone_data_off(pbat->params, zone);
BUG_ON(idx >= zone_pblk_count);
mutex_lock(&pbat->lock);
BUG_ON(pbat->state == CACHE_STATE_UNCACHED);
cbd_bitmap_free(pbat->buf, idx);
pbat->state = CACHE_STATE_DIRTY;
mutex_unlock(&pbat->lock);
return 0;
}
struct pbatcache {
struct mutex lock;
struct cbd_params* params;
unsigned int len;
struct pbat** cache;
2019-10-30 18:23:45 +01:00
struct iostate* lrsv;
struct iostate* lwsv;
};
size_t
pbatcache_size(void)
{
return sizeof(struct pbatcache);
}
static bool
pbatcache_realloc(struct pbatcache* pc, unsigned int len)
{
struct pbat** cache;
unsigned int n;
struct pbat* pbat;
cache = kzalloc(len * sizeof(struct pbat*), GFP_KERNEL);
if (!cache) {
return false;
}
n = 0;
if (pc->len) {
memcpy(cache, pc->cache, pc->len * sizeof(struct pbat*));
n = pc->len;
kfree(pc->cache);
}
pc->len = len;
pc->cache = cache;
while (n < len) {
pbat = kmalloc(sizeof(struct pbat), GFP_KERNEL);
if (!pbat) {
return false;
}
cache[n++] = pbat;
if (!pbat_ctr(pbat, pc->params)) {
return false;
}
}
return true;
}
bool
pbatcache_ctr(struct pbatcache* pc,
struct cbd_params* params)
{
2019-10-30 18:23:45 +01:00
unsigned int idx;
memset(pc, 0, sizeof(struct pbatcache));
mutex_init(&pc->lock);
pc->params = params;
2019-10-30 18:23:45 +01:00
pc->lrsv = kzalloc(params->nr_zones * sizeof(struct iostate), GFP_KERNEL);
pc->lwsv = kzalloc(params->nr_zones * sizeof(struct iostate), GFP_KERNEL);
for (idx = 0; idx < params->nr_zones; ++idx) {
pc->lrsv[idx].cpu = CPU_NONE;
pc->lwsv[idx].cpu = CPU_NONE;
}
return pbatcache_realloc(pc, 1);
}
void
pbatcache_dtr(struct pbatcache* pc)
{
unsigned int n;
struct pbat* pbat;
2019-10-30 18:23:45 +01:00
kfree(pc->lwsv);
kfree(pc->lrsv);
for (n = 0; n < pc->len; ++n) {
pbat = pc->cache[n];
if (!pbat) {
continue;
}
pbat_dtr(pbat);
if (pbat->ref) {
printk(KERN_ERR "%s: pbat ref leak: n=%u ref=%u\n", __func__, n, pbat->ref);
}
kfree(pbat);
}
kfree(pc->cache);
pc->cache = NULL;
pc->len = 0;
pc->params = NULL;
}
struct pbat*
pbatcache_get(struct pbatcache* pc, u32 zone)
{
unsigned int n;
struct pbat* pbat;
mutex_lock(&pc->lock);
for (n = 0; n < pc->len; ++n) {
pbat = pc->cache[n];
mutex_lock(&pbat->reflock);
if (pbat->zone == zone) {
if (pbat->ref == 0) {
goto found;
}
++pbat->ref;
mutex_unlock(&pbat->reflock);
goto out;
}
mutex_unlock(&pbat->reflock);
}
for (n = 0; n < pc->len; ++n) {
pbat = pc->cache[n];
mutex_lock(&pbat->reflock);
if (pbat->zone == ZONE_NONE) {
goto found;
}
mutex_unlock(&pbat->reflock);
}
for (n = 0; n < pc->len; ++n) {
pbat = pc->cache[n];
mutex_lock(&pbat->reflock);
if (pbat->ref == 0 && pbat->state != CACHE_STATE_ERROR) {
goto found;
}
mutex_unlock(&pbat->reflock);
}
n = pc->len;
if (!pbatcache_realloc(pc, pc->len * 2)) {
printk(KERN_ERR "%s: realloc failed\n", __func__);
pbat = NULL;
goto out;
}
pbat = pc->cache[n];
mutex_lock(&pbat->reflock);
found:
pbat_reset(pbat, zone);
pbat->ref = 1;
2019-10-30 18:23:45 +01:00
memcpy(&pbat->lrs, &pc->lrsv[zone], sizeof(struct iostate));
memcpy(&pbat->lws, &pc->lwsv[zone], sizeof(struct iostate));
mutex_unlock(&pbat->reflock);
out:
mutex_unlock(&pc->lock);
return pbat;
}
int
pbatcache_put(struct pbatcache* pc, struct pbat* pbat)
{
int ret = 0;
if (!pbat) {
return 0;
}
mutex_lock(&pc->lock);
mutex_lock(&pbat->reflock);
if (--pbat->ref == 0) {
ret = pbat_flush(pbat);
if (ret) {
printk(KERN_ERR "%s: pbat_flush failed\n", __func__);
}
2019-10-30 18:23:45 +01:00
memcpy(&pc->lrsv[pbat->zone], &pbat->lrs, sizeof(struct iostate));
memcpy(&pc->lwsv[pbat->zone], &pbat->lws, sizeof(struct iostate));
}
mutex_unlock(&pbat->reflock);
mutex_unlock(&pc->lock);
return ret;
}