From: Changli Gao on
On Thu, May 6, 2010 at 8:30 AM, Changli Gao <xiaosuo(a)gmail.com> wrote:
> void kvfree(void *ptr, size_t size)
> {
>        if (size < PAGE_SIZE)
>                kfree(ptr);
>        else if (is_vmalloc_addr(ptr))
>                vfree(ptr);
>        else
>                free_pages_exact(ptr, size);
> }

I found a way to eliminate the size argument of kvfree() and
kvfree_inatomic(). We can check where the page_head is slab or
compound, as pages owned by slab subsystem always have Slab flag set
or are compound.

page = virt_to_head_page(ptr);
if (PageSlab(page) || PageCompound(page))
kfree(ptr);
else
free_pages_exact(ptr, page->private);

And we can save the size argument in page->private after
alloc_pages_eact() returns.

ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
if (ptr != NULL) {
virt_to_head_page(ptr)->private = size;
return ptr;
}

here is the test code:

#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/interrupt.h>

#define kmalloc(size, gfp) ({ \
void *ptr = kmalloc(size, gfp); \
if (ptr != NULL) \
printk("kmalloc: %p(%u)\n", ptr, size); \
ptr; })

#define alloc_pages_exact(size, gfp) ({ \
void *ptr = alloc_pages_exact(size, gfp); \
if (ptr != NULL) \
printk("alloc_pages_exact: %p(%u)\n", ptr, size); \
ptr; })

#define vmalloc(size) ({ \
void *ptr = vmalloc(size); \
if (ptr != NULL) \
printk("vmalloc: %p(%u)\n", ptr, size); \
ptr; })

#define kfree(ptr) do { printk("kfree: %p\n", ptr); kfree(ptr); } while (0)
#define vfree(ptr) do { printk("vfree: %p\n", ptr); vfree(ptr); } while (0)
#define free_pages_exact(ptr, size) do { \
printk("free_pages_exact: %p(%u)\n", ptr, size); \
free_pages_exact(ptr, size); \
} while (0)

void *kvmalloc(size_t size)
{
void *ptr;

if (size < PAGE_SIZE)
return kmalloc(size, GFP_KERNEL);
ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
if (ptr != NULL) {
virt_to_head_page(ptr)->private = size;
return ptr;
}

return vmalloc(size);
}
EXPORT_SYMBOL(kvmalloc);

static void kvfree_work(struct work_struct *work)
{
vfree(work);
}

static void __kvfree(void *ptr, bool inatomic)
{
if (unlikely(ZERO_OR_NULL_PTR(ptr)))
return;
if (is_vmalloc_addr(ptr)) {
if (inatomic) {
struct work_struct *work;

work = ptr;
BUILD_BUG_ON(sizeof(struct work_struct) > PAGE_SIZE);
INIT_WORK(work, kvfree_work);
schedule_work(work);
} else {
vfree(ptr);
}
} else {
struct page *page;

page = virt_to_head_page(ptr);
if (PageSlab(page) || PageCompound(page))
kfree(ptr);
else
free_pages_exact(ptr, page->private);
}
}

void kvfree(void *ptr)
{
__kvfree(ptr, false);
}
EXPORT_SYMBOL(kvfree);

void kvfree_inatomic(void *ptr)
{
__kvfree(ptr, true);
}
EXPORT_SYMBOL(kvfree_inatomic);

//--------------------
// for testing
static int test_init(void)
{
int size;
void *ptr;

for (size = 1; size < (1<<30); size <<= 1) {
ptr = kvmalloc(size);
if (ptr == NULL)
return -1;
if (is_vmalloc_addr(ptr)) {
kvfree(ptr);
break;
}
kvfree(ptr);
}
ptr = kvmalloc(size);
if (ptr == NULL)
return -1;
kvfree_inatomic(ptr);

return 0;
}
module_init(test_init);

static void test_exit(void)
{
}
module_exit(test_exit);

MODULE_LICENSE("GPL");

--
Regards,
Changli Gao(xiaosuo(a)gmail.com)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Tetsuo Handa on
Changli Gao wrote:
> static void __kvfree(void *ptr, bool inatomic)

inatomic might be confusing because what vfree() checks is
BUG_ON(in_interrupt()) rather than BUG_ON(in_atomic()).

> {
> if (unlikely(ZERO_OR_NULL_PTR(ptr)))
> return;
> if (is_vmalloc_addr(ptr)) {
> if (inatomic) {

By the way, is in_interrupt() a heavy operation?

register unsigned long current_stack_pointer asm("esp") __used;
static inline struct thread_info *current_thread_info(void)
{
return (struct thread_info *)
(current_stack_pointer & ~(THREAD_SIZE - 1));
}
#define preempt_count() (current_thread_info()->preempt_count)
#define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK))
#define in_interrupt() (irq_count())

If we can agree on changing from (inatomic) to (in_interrupt()),
we can merge kvfree() and kvfree_inatomic().

> struct work_struct *work;
>
> work = ptr;
> BUILD_BUG_ON(sizeof(struct work_struct) > PAGE_SIZE);
> INIT_WORK(work, kvfree_work);
> schedule_work(work);
> } else {
> vfree(ptr);
> }
> } else {
> struct page *page;
>
> page = virt_to_head_page(ptr);
> if (PageSlab(page) || PageCompound(page))
> kfree(ptr);
> else
> free_pages_exact(ptr, page->private);
> }
> }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Peter Zijlstra on
On Fri, 2010-05-07 at 21:42 +0900, Tetsuo Handa wrote:
> Changli Gao wrote:
> > static void __kvfree(void *ptr, bool inatomic)
>
> inatomic might be confusing because what vfree() checks is
> BUG_ON(in_interrupt()) rather than BUG_ON(in_atomic()).
>
> > {
> > if (unlikely(ZERO_OR_NULL_PTR(ptr)))
> > return;
> > if (is_vmalloc_addr(ptr)) {
> > if (inatomic) {
>
> By the way, is in_interrupt() a heavy operation?
>
> register unsigned long current_stack_pointer asm("esp") __used;
> static inline struct thread_info *current_thread_info(void)
> {
> return (struct thread_info *)
> (current_stack_pointer & ~(THREAD_SIZE - 1));
> }
> #define preempt_count() (current_thread_info()->preempt_count)
> #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK | NMI_MASK))
> #define in_interrupt() (irq_count())
>
> If we can agree on changing from (inatomic) to (in_interrupt()),
> we can merge kvfree() and kvfree_inatomic().

I really dislike all of that, just don't allow usage from interrupt
context.



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: KOSAKI Motohiro on
Hi

> void *kvmalloc(size_t size)
> {
> void *ptr;
>
> if (size < PAGE_SIZE)
> return kmalloc(PAGE_SIZE, GFP_KERNEL);
> ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);

low order GFP_KERNEL allocation never fail. then, this doesn't works
as you expected.

> if (ptr != NULL)
> return ptr;
>
> return vmalloc(size);

On x86, vmalloc area is only 128MB address space. it is very rare
resource than physical ram. vmalloc fallback is not good idea.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Jiri Slaby on
On 05/13/2010 06:45 AM, KOSAKI Motohiro wrote:
> Hi
>
>> void *kvmalloc(size_t size)
>> {
>> void *ptr;
>>
>> if (size < PAGE_SIZE)
>> return kmalloc(PAGE_SIZE, GFP_KERNEL);
>> ptr = alloc_pages_exact(size, GFP_KERNEL | __GFP_NOWARN);
>
> low order GFP_KERNEL allocation never fail. then, this doesn't works
> as you expected.

Hi, I suppose you mean the kmalloc allocation -- so kmalloc should fail
iff alloc_pages_exact (unless somebody frees a heap of memory indeed)?

>> if (ptr != NULL)
>> return ptr;
>>
>> return vmalloc(size);
>
> On x86, vmalloc area is only 128MB address space. it is very rare
> resource than physical ram. vmalloc fallback is not good idea.

These functions are a replacement for explicit
if (!(x = kmalloc()))
x = vmalloc();
....
if (is_vmalloc(x))
vfree(x);
else
kfree(x);
in the code (like fdtable does this).

The 128M limit on x86_32 for vmalloc is configurable so if drivers in
sum need more on some specific hardware, it can be increased on the
command line (I had to do this on one machine in the past).

Anyway as this is a replacement for explicit tests, it shouldn't change
the behaviour in any way. Obviously when a user doesn't need virtually
contiguous space, he shouldn't use this interface at all.

thanks,
--
js
suse labs
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/