From: Nitin Gupta on
Pages are now compressed using LZO compression algorithm
and a new statistic is exported through sysfs:

/sys/kernel/mm/zcache/pool<id>/compr_data_size

This gives compressed size of pages stored. So, we can
get compression ratio using this and the orig_data_size
statistic which is already exported.

We only keep pages which compress to less than or equal to
PAGE_SIZE/2. However, we still allocate full pages to store
compressed chunks.

Another change is to enforce memlimit again compressed
size instead of pages_stored (uncompressed size).

Signed-off-by: Nitin Gupta <ngupta(a)vflare.org>
---
drivers/staging/zram/zcache_drv.c | 254 +++++++++++++++++++++++++-----------
drivers/staging/zram/zcache_drv.h | 7 +-
2 files changed, 181 insertions(+), 80 deletions(-)

diff --git a/drivers/staging/zram/zcache_drv.c b/drivers/staging/zram/zcache_drv.c
index 3ea45a6..2a02606 100644
--- a/drivers/staging/zram/zcache_drv.c
+++ b/drivers/staging/zram/zcache_drv.c
@@ -40,13 +40,18 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/cleancache.h>
+#include <linux/cpu.h>
#include <linux/highmem.h>
+#include <linux/lzo.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/u64_stats_sync.h>

#include "zcache_drv.h"

+static DEFINE_PER_CPU(unsigned char *, compress_buffer);
+static DEFINE_PER_CPU(unsigned char *, compress_workmem);
+
/*
* For zero-filled pages, we directly insert 'index' value
* in corresponding radix node. These defines make sure we
@@ -96,7 +101,6 @@ static void zcache_add_stat(struct zcache_pool *zpool,
stats->count[idx] += val;
u64_stats_update_end(&stats->syncp);
preempt_enable();
-
}

static void zcache_inc_stat(struct zcache_pool *zpool,
@@ -442,11 +446,20 @@ static void *zcache_index_to_ptr(unsigned long index)
}

/*
- * Returns index value encoded in the given radix node pointer.
+ * Radix node contains "pointer" value which encode <page, offset>
+ * pair, locating the compressed object. Header of the object then
+ * contains corresponding 'index' value.
*/
-static unsigned long zcache_ptr_to_index(void *ptr)
+static unsigned long zcache_ptr_to_index(struct page *page)
{
- return (unsigned long)(ptr) >> ZCACHE_ZERO_PAGE_INDEX_SHIFT;
+ unsigned long index;
+
+ if (zcache_is_zero_page(page))
+ index = (unsigned long)(page) >> ZCACHE_ZERO_PAGE_INDEX_SHIFT;
+ else
+ index = page->index;
+
+ return index;
}

void zcache_free_page(struct zcache_pool *zpool, struct page *page)
@@ -457,8 +470,12 @@ void zcache_free_page(struct zcache_pool *zpool, struct page *page)
return;

is_zero = zcache_is_zero_page(page);
- if (!is_zero)
+ if (!is_zero) {
+ int clen = page->private;
+
+ zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE, -clen);
__free_page(page);
+ }

zcache_dec_pages(zpool, is_zero);
}
@@ -474,9 +491,12 @@ static int zcache_store_page(struct zcache_inode_rb *znode,
pgoff_t index, struct page *page, int is_zero)
{
int ret;
+ size_t clen;
unsigned long flags;
struct page *zpage;
- void *src_data, *dest_data;
+ unsigned char *zbuffer, *zworkmem;
+ unsigned char *src_data, *dest_data;
+ struct zcache_pool *zpool = znode->pool;

if (is_zero) {
zpage = zcache_index_to_ptr(index);
@@ -488,13 +508,33 @@ static int zcache_store_page(struct zcache_inode_rb *znode,
ret = -ENOMEM;
goto out;
}
- zpage->index = index;
+
+ preempt_disable();
+ zbuffer = __get_cpu_var(compress_buffer);
+ zworkmem = __get_cpu_var(compress_workmem);
+ if (unlikely(!zbuffer || !zworkmem)) {
+ ret = -EFAULT;
+ preempt_enable();
+ goto out;
+ }

src_data = kmap_atomic(page, KM_USER0);
- dest_data = kmap_atomic(zpage, KM_USER1);
- memcpy(dest_data, src_data, PAGE_SIZE);
+ ret = lzo1x_1_compress(src_data, PAGE_SIZE, zbuffer, &clen, zworkmem);
kunmap_atomic(src_data, KM_USER0);
- kunmap_atomic(dest_data, KM_USER1);
+
+ if (unlikely(ret != LZO_E_OK) || clen > zcache_max_page_size) {
+ ret = -EINVAL;
+ preempt_enable();
+ goto out;
+ }
+
+ dest_data = kmap_atomic(zpage, KM_USER0);
+ memcpy(dest_data, zbuffer, clen);
+ kunmap_atomic(dest_data, KM_USER0);
+ preempt_enable();
+
+ zpage->index = index;
+ zpage->private = clen;

out_store:
spin_lock_irqsave(&znode->tree_lock, flags);
@@ -505,11 +545,19 @@ out_store:
__free_page(zpage);
goto out;
}
- if (!is_zero)
+ if (is_zero) {
+ zcache_inc_stat(zpool, ZPOOL_STAT_PAGES_ZERO);
+ } else {
+ int delta = zcache_max_page_size - clen;
+ zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE, -delta);
+ zcache_inc_stat(zpool, ZPOOL_STAT_PAGES_STORED);
radix_tree_tag_set(&znode->page_tree, index,
ZCACHE_TAG_NONZERO_PAGE);
+ }
spin_unlock_irqrestore(&znode->tree_lock, flags);

+ ret = 0; /* success */
+
out:
return ret;
}
@@ -525,42 +573,6 @@ out:
*/
#define FREE_BATCH 16
static void zcache_free_inode_pages(struct zcache_inode_rb *znode,
- u32 pages_to_free)
-{
- int count;
- unsigned long index = 0;
- struct zcache_pool *zpool = znode->pool;
-
- do {
- int i;
- struct page *pages[FREE_BATCH];
-
- count = radix_tree_gang_lookup(&znode->page_tree,
- (void **)pages, index, FREE_BATCH);
- if (count > pages_to_free)
- count = pages_to_free;
-
- for (i = 0; i < count; i++) {
- if (zcache_is_zero_page(pages[i]))
- index = zcache_ptr_to_index(pages[i]);
- else
- index = pages[i]->index;
- radix_tree_delete(&znode->page_tree, index);
- zcache_free_page(zpool, pages[i]);
- }
-
- index++;
- pages_to_free -= count;
- } while (pages_to_free && (count == FREE_BATCH));
-}
-
-/*
- * Same as the previous function except that we only look for
- * pages with the given tag set.
- *
- * Called under zcache_inode_rb->tree_lock
- */
-static void zcache_free_inode_pages_tag(struct zcache_inode_rb *znode,
u32 pages_to_free, enum zcache_tag tag)
{
int count;
@@ -569,17 +581,26 @@ static void zcache_free_inode_pages_tag(struct zcache_inode_rb *znode,

do {
int i;
- struct page *pages[FREE_BATCH];
+ void *objs[FREE_BATCH];
+
+ if (tag == ZCACHE_TAG_INVALID)
+ count = radix_tree_gang_lookup(&znode->page_tree,
+ objs, index, FREE_BATCH);
+ else
+ count = radix_tree_gang_lookup_tag(&znode->page_tree,
+ objs, index, FREE_BATCH, tag);

- count = radix_tree_gang_lookup_tag(&znode->page_tree,
- (void **)pages, index, FREE_BATCH, tag);
if (count > pages_to_free)
count = pages_to_free;

for (i = 0; i < count; i++) {
- index = pages[i]->index;
- zcache_free_page(zpool, pages[i]);
- radix_tree_delete(&znode->page_tree, index);
+ void *obj;
+ unsigned long index;
+
+ index = zcache_ptr_to_index(objs[i]);
+ obj = radix_tree_delete(&znode->page_tree, index);
+ BUG_ON(obj != objs[i]);
+ zcache_free_page(zpool, obj);
}

index++;
@@ -663,7 +684,7 @@ static void zcache_shrink_pool(struct zcache_pool *zpool)

spin_lock(&znode->tree_lock);
/* Free 'pages_to_free' non-zero pages in the current node */
- zcache_free_inode_pages_tag(znode, pages_to_free,
+ zcache_free_inode_pages(znode, pages_to_free,
ZCACHE_TAG_NONZERO_PAGE);
if (zcache_inode_is_empty(znode))
zcache_inode_isolate(znode);
@@ -721,6 +742,16 @@ static ssize_t orig_data_size_show(struct kobject *kobj,
}
ZCACHE_POOL_ATTR_RO(orig_data_size);

+static ssize_t compr_data_size_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct zcache_pool *zpool = zcache_kobj_to_pool(kobj);
+
+ return sprintf(buf, "%llu\n", zcache_get_stat(
+ zpool, ZPOOL_STAT_COMPR_SIZE));
+}
+ZCACHE_POOL_ATTR_RO(compr_data_size);
+
static void memlimit_sysfs_common(struct kobject *kobj, u64 *value, int store)
{
struct zcache_pool *zpool = zcache_kobj_to_pool(kobj);
@@ -763,6 +794,7 @@ ZCACHE_POOL_ATTR(memlimit);
static struct attribute *zcache_pool_attrs[] = {
&zero_pages_attr.attr,
&orig_data_size_attr.attr,
+ &compr_data_size_attr.attr,
&memlimit_attr.attr,
NULL,
};
@@ -867,21 +899,25 @@ static int zcache_init_shared_fs(char *uuid, size_t pagesize)
* If found, copies it to the given output page 'page' and frees
* zcache copy of the same.
*
- * Returns 0 if requested page found, -1 otherwise.
+ * Returns 0 on success, negative error code on failure.
*/
static int zcache_get_page(int pool_id, ino_t inode_no,
pgoff_t index, struct page *page)
{
int ret = -1;
+ size_t clen;
unsigned long flags;
struct page *src_page;
- void *src_data, *dest_data;
+ unsigned char *src_data, *dest_data;
+
struct zcache_inode_rb *znode;
struct zcache_pool *zpool = zcache->pools[pool_id];

znode = zcache_find_inode(zpool, inode_no);
- if (!znode)
+ if (!znode) {
+ ret = -EFAULT;
goto out;
+ }

BUG_ON(znode->inode_no != inode_no);

@@ -893,20 +929,30 @@ static int zcache_get_page(int pool_id, ino_t inode_no,

kref_put(&znode->refcount, zcache_inode_release);

- if (!src_page)
+ if (!src_page) {
+ ret = -EFAULT;
goto out;
+ }

if (zcache_is_zero_page(src_page)) {
zcache_handle_zero_page(page);
goto out_free;
}

+ clen = PAGE_SIZE;
src_data = kmap_atomic(src_page, KM_USER0);
dest_data = kmap_atomic(page, KM_USER1);
- memcpy(dest_data, src_data, PAGE_SIZE);
+
+ ret = lzo1x_decompress_safe(src_data, src_page->private,
+ dest_data, &clen);
+
kunmap_atomic(src_data, KM_USER0);
kunmap_atomic(dest_data, KM_USER1);

+ /* Failure here means bug in LZO! */
+ if (unlikely(ret != LZO_E_OK))
+ goto out_free;
+
flush_dcache_page(page);

out_free:
@@ -942,18 +988,16 @@ static void zcache_put_page(int pool_id, ino_t inode_no,
void *src_data = kmap_atomic(page, KM_USER0);
is_zero = zcache_page_zero_filled(src_data);
kunmap_atomic(src_data, KM_USER0);
- if (is_zero) {
- zcache_inc_stat(zpool, ZPOOL_STAT_PAGES_ZERO);
+ if (is_zero)
goto out_find_store;
- }

/*
- * Incrementing local pages_stored before summing it from
+ * Incrementing local compr_size before summing it from
* all CPUs makes sure we do not end up storing pages in
* excess of memlimit. In case of failure, we revert back
* this local increment.
*/
- zcache_inc_stat(zpool, ZPOOL_STAT_PAGES_STORED);
+ zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE, zcache_max_page_size);

/*
* memlimit can be changed any time by user using sysfs. If
@@ -961,9 +1005,10 @@ static void zcache_put_page(int pool_id, ino_t inode_no,
* stored, then excess pages are freed synchronously when this
* sysfs event occurs.
*/
- if (zcache_get_stat(zpool, ZPOOL_STAT_PAGES_STORED) >
- zcache_get_memlimit(zpool) >> PAGE_SHIFT) {
- zcache_dec_stat(zpool, ZPOOL_STAT_PAGES_STORED);
+ if (zcache_get_stat(zpool, ZPOOL_STAT_COMPR_SIZE) >
+ zcache_get_memlimit(zpool)) {
+ zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE,
+ -zcache_max_page_size);
return;
}

@@ -972,7 +1017,8 @@ out_find_store:
if (!znode) {
znode = zcache_inode_create(pool_id, inode_no);
if (unlikely(!znode)) {
- zcache_dec_pages(zpool, is_zero);
+ zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE,
+ -zcache_max_page_size);
return;
}
}
@@ -985,9 +1031,9 @@ out_find_store:
zcache_free_page(zpool, zpage);

ret = zcache_store_page(znode, index, page, is_zero);
- if (ret) { /* failure */
- zcache_dec_pages(zpool, is_zero);
-
+ if (unlikely(ret)) {
+ zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE,
+ -zcache_max_page_size);
/*
* Its possible that racing zcache get/flush could not
* isolate this node since we held a reference to it.
@@ -1046,7 +1092,7 @@ static void zcache_flush_inode(int pool_id, ino_t inode_no)
return;

spin_lock_irqsave(&znode->tree_lock, flags);
- zcache_free_inode_pages(znode, UINT_MAX);
+ zcache_free_inode_pages(znode, UINT_MAX, ZCACHE_TAG_INVALID);
if (zcache_inode_is_empty(znode))
zcache_inode_isolate(znode);
spin_unlock_irqrestore(&znode->tree_lock, flags);
@@ -1080,7 +1126,7 @@ static void zcache_flush_fs(int pool_id)
while (node) {
znode = rb_entry(node, struct zcache_inode_rb, rb_node);
node = rb_next(node);
- zcache_free_inode_pages(znode, UINT_MAX);
+ zcache_free_inode_pages(znode, UINT_MAX, ZCACHE_TAG_INVALID);
rb_erase(&znode->rb_node, &zpool->inode_tree);
kfree(znode);
}
@@ -1088,8 +1134,47 @@ static void zcache_flush_fs(int pool_id)
zcache_destroy_pool(zpool);
}

+/*
+ * Callback for CPU hotplug events. Allocates percpu compression buffers.
+ */
+static int zcache_cpu_notify(struct notifier_block *nb, unsigned long action,
+ void *pcpu)
+{
+ int cpu = (long)pcpu;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ per_cpu(compress_buffer, cpu) = (void *)__get_free_pages(
+ GFP_KERNEL | __GFP_ZERO, 1);
+ per_cpu(compress_workmem, cpu) = kzalloc(
+ LZO1X_MEM_COMPRESS, GFP_KERNEL);
+
+ break;
+ case CPU_DEAD:
+ case CPU_UP_CANCELED:
+ free_pages((unsigned long)(per_cpu(compress_buffer, cpu)), 1);
+ per_cpu(compress_buffer, cpu) = NULL;
+
+ kfree(per_cpu(compress_buffer, cpu));
+ per_cpu(compress_buffer, cpu) = NULL;
+
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block zcache_cpu_nb = {
+ .notifier_call = zcache_cpu_notify
+};
+
static int __init zcache_init(void)
{
+ int ret = -ENOMEM;
+ unsigned int cpu;
+
struct cleancache_ops ops = {
.init_fs = zcache_init_fs,
.init_shared_fs = zcache_init_shared_fs,
@@ -1102,20 +1187,33 @@ static int __init zcache_init(void)

zcache = kzalloc(sizeof(*zcache), GFP_KERNEL);
if (!zcache)
- return -ENOMEM;
+ goto out;
+
+ ret = register_cpu_notifier(&zcache_cpu_nb);
+ if (ret)
+ goto out;
+
+ for_each_online_cpu(cpu) {
+ void *pcpu = (void *)(long)cpu;
+ zcache_cpu_notify(&zcache_cpu_nb, CPU_UP_PREPARE, pcpu);
+ }

#ifdef CONFIG_SYSFS
/* Create /sys/kernel/mm/zcache/ */
zcache->kobj = kobject_create_and_add("zcache", mm_kobj);
- if (!zcache->kobj) {
- kfree(zcache);
- return -ENOMEM;
- }
+ if (!zcache->kobj)
+ goto out;
#endif

spin_lock_init(&zcache->pool_lock);
cleancache_ops = ops;

+ ret = 0; /* success */
+
+out:
+ if (ret)
+ kfree(zcache);
+
return 0;
}

diff --git a/drivers/staging/zram/zcache_drv.h b/drivers/staging/zram/zcache_drv.h
index 1e8c931..9ce97da 100644
--- a/drivers/staging/zram/zcache_drv.h
+++ b/drivers/staging/zram/zcache_drv.h
@@ -24,20 +24,22 @@
enum zcache_pool_stats_index {
ZPOOL_STAT_PAGES_ZERO,
ZPOOL_STAT_PAGES_STORED,
+ ZPOOL_STAT_COMPR_SIZE,
ZPOOL_STAT_NSTATS,
};

/* Radix-tree tags */
enum zcache_tag {
ZCACHE_TAG_NONZERO_PAGE,
- ZCACHE_TAG_UNUSED
+ ZCACHE_TAG_UNUSED,
+ ZCACHE_TAG_INVALID
};

/* Default zcache per-pool memlimit: 10% of total RAM */
static const unsigned zcache_pool_default_memlimit_perc_ram = 10;

/* We only keep pages that compress to less than this size */
-static const unsigned zcache_max_page_size = PAGE_SIZE / 2;
+static const int zcache_max_page_size = PAGE_SIZE / 2;

/* Red-Black tree node. Maps inode to its page-tree */
struct zcache_inode_rb {
@@ -61,6 +63,7 @@ struct zcache_pool {

seqlock_t memlimit_lock; /* protects memlimit */
u64 memlimit; /* bytes */
+
struct zcache_pool_stats_cpu *stats; /* percpu stats */
#ifdef CONFIG_SYSFS
unsigned char name[MAX_ZPOOL_NAME_LEN];
--
1.7.1.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/