From: KAMEZAWA Hiroyuki on
On Sat, 31 Jul 2010 14:41:26 +1000
Bojan Smojver <bojan(a)rexursive.com> wrote:

> On Sat, 2010-07-31 at 11:33 +1000, Bojan Smojver wrote:
> > I can go back to that easily.
>
> So, here is that whole enchilada one more time (it includes sync_read
> removal patch as well).
>
> I did 3 hibernate/thaw cycles with it. Images varied from about 850 MB,
> 1.1 GB to 750 MB. I was getting 156/141 MB/s, 121/118 MBs and 141/130
> MBs speeds. Obviously, these things depend on compression ratios
> achieved etc.
>
> I guess the number of pages (i.e. LZO_UNC_PAGES) could be made
> configurable as well.
>
> PS. Inline, as requested.
>


I'm sorry if I miss something.


> + wrk = vmalloc(LZO1X_1_MEM_COMPRESS);
> + if (!wrk) {
> + printk(KERN_ERR "PM: Failed to allocate LZO workspace\n");
> + free_page((unsigned long)page);
> + return -ENOMEM;
> + }
> +
> + unc = vmalloc(LZO_UNC_SIZE);
> + if (!unc) {
> + printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
> + vfree(wrk);
> + free_page((unsigned long)page);
> + return -ENOMEM;
> + }

Now, vmallc() is used here. Then, following will happen.

1. vmalloc()
-> vmalloc adds vmap objects and set page table entries.

2. saving image
-> At taking snapshot of memory to the disk, above vmalloc() area is
saved to disk as it is.
....
3. At restore
Because you dont't remember which vmalloc() area was used for creating
snapshot, you can't free it at swsusp_free().

memory leak ?

Thanks,
-Kame

> +
> + cmp = vmalloc(LZO_CMP_SIZE);
> + if (!cmp) {
> + printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
> + vfree(unc);
> + vfree(wrk);
> + free_page((unsigned long)page);
> + return -ENOMEM;
> + }
>
> printk(KERN_INFO "PM: Saving image data pages (%u pages) ... ",
> nr_to_write);
> @@ -382,16 +422,48 @@ static int save_image(struct swap_map_handle *handle,
> bio = NULL;
> do_gettimeofday(&start);
> while (1) {
> - ret = snapshot_read_next(snapshot);
> - if (ret <= 0)
> + for (ul = 0; ul < LZO_UNC_SIZE; ul += PAGE_SIZE) {
> + ret = snapshot_read_next(snapshot);
> + if (ret < 0)
> + goto out_finish;
> +
> + if (ret == 0)
> + break;
> +
> + memcpy(unc + ul, data_of(*snapshot), PAGE_SIZE);
> +
> + if (!(nr_pages % m))
> + printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
> + nr_pages++;
> + }
> +
> + if (ul == 0)
> + break;
> +
> + ret = lzo1x_1_compress(unc, ul, cmp + LZO_HEADER, &cl, wrk);
> + if (ret < 0) {
> + printk(KERN_ERR "PM: LZO compression failed\n");
> break;
> - ret = swap_write_page(handle, data_of(*snapshot), &bio);
> - if (ret)
> + }
> +
> + if (unlikely(cl == 0 || LZO_HEADER + cl > LZO_CMP_SIZE)) {
> + printk(KERN_ERR "PM: Invalid LZO length\n");
> + ret = -1;
> break;
> - if (!(nr_pages % m))
> - printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m);
> - nr_pages++;
> + }
> +
> + *(size_t *)cmp = cl;
> +
> + for (ul = 0; ul < LZO_HEADER + cl; ul += PAGE_SIZE) {
> + memcpy(page, cmp + ul, PAGE_SIZE);
> +
> + ret = swap_write_page(handle, page, &bio);
> + if (ret)
> + goto out_finish;
> + }
> }
> +
> +out_finish:
> err2 = hib_wait_on_bio_chain(&bio);
> do_gettimeofday(&stop);
> if (!ret)
> @@ -401,6 +473,12 @@ static int save_image(struct swap_map_handle *handle,
> else
> printk(KERN_CONT "\n");
> swsusp_show_speed(&start, &stop, nr_to_write, "Wrote");
> +
> + vfree(cmp);
> + vfree(unc);
> + vfree(wrk);
> + free_page((unsigned long)page);
> +
> return ret;
> }
>
> @@ -416,7 +494,8 @@ static int enough_swap(unsigned int nr_pages)
> unsigned int free_swap = count_swap_pages(root_swap, 1);
>
> pr_debug("PM: Free swap pages: %u\n", free_swap);
> - return free_swap > nr_pages + PAGES_FOR_IO;
> + return free_swap >
> + (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + PAGES_FOR_IO;
> }
>
> /**
> @@ -547,9 +626,30 @@ static int load_image(struct swap_map_handle *handle,
> int error = 0;
> struct timeval start;
> struct timeval stop;
> - struct bio *bio;
> - int err2;
> unsigned nr_pages;
> + size_t ul, cl;
> + unsigned char *unc, *cmp, *page;
> +
> + page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH);
> + if (!page) {
> + printk(KERN_ERR "PM: Failed to allocate LZO page\n");
> + return -ENOMEM;
> + }
> +
> + unc = vmalloc(LZO_UNC_SIZE);
> + if (!unc) {
> + printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n");
> + free_page((unsigned long)page);
> + return -ENOMEM;
> + }
> +
> + cmp = vmalloc(LZO_CMP_SIZE);
> + if (!cmp) {
> + printk(KERN_ERR "PM: Failed to allocate LZO compressed\n");
> + vfree(unc);
> + free_page((unsigned long)page);
> + return -ENOMEM;
> + }
>
> printk(KERN_INFO "PM: Loading image data pages (%u pages) ... ",
> nr_to_read);
> @@ -557,27 +657,60 @@ static int load_image(struct swap_map_handle *handle,
> if (!m)
> m = 1;
> nr_pages = 0;
> - bio = NULL;
> do_gettimeofday(&start);
> +
> + error = snapshot_write_next(snapshot);
> + if (error <= 0)
> + goto out_finish;
> +
> for ( ; ; ) {
> - error = snapshot_write_next(snapshot);
> - if (error <= 0)
> - break;
> - error = swap_read_page(handle, data_of(*snapshot), &bio);
> + error = swap_read_page(handle, page, NULL); /* sync */
> if (error)
> break;
> - if (snapshot->sync_read)
> - error = hib_wait_on_bio_chain(&bio);
> - if (error)
> + memcpy(cmp, page, PAGE_SIZE);
> +
> + cl = *(size_t *)cmp;
> + if (unlikely(cl == 0 || LZO_HEADER + cl > LZO_CMP_SIZE)) {
> + printk(KERN_ERR "PM: Invalid LZO length\n");
> + error = -1;
> + break;
> + }
> +
> + for (ul = PAGE_SIZE; ul < LZO_HEADER + cl; ul += PAGE_SIZE) {
> + error = swap_read_page(handle, page, NULL); /* sync */
> + if (error)
> + goto out_finish;
> + memcpy(cmp + ul, page, PAGE_SIZE);
> + }
> +
> + ul = LZO_UNC_SIZE;
> + error = lzo1x_decompress_safe(cmp + LZO_HEADER, cl, unc, &ul);
> + if (error < 0) {
> + printk(KERN_ERR "PM: LZO decompression failed\n");
> break;
> - if (!(nr_pages % m))
> - printk("\b\b\b\b%3d%%", nr_pages / m);
> - nr_pages++;
> + }
> +
> + if (unlikely(ul == 0 || ul > LZO_UNC_SIZE)) {
> + printk(KERN_ERR "PM: Invalid LZO length\n");
> + error = -1;
> + break;
> + }
> +
> + for (cl = 0; cl < ul; cl += PAGE_SIZE) {
> + memcpy(data_of(*snapshot), unc + cl, PAGE_SIZE);
> +
> + if (!(nr_pages % m))
> + printk("\b\b\b\b%3d%%", nr_pages / m);
> + nr_pages++;
> +
> + error = snapshot_write_next(snapshot);
> + if (error <= 0)
> + goto out_finish;
> + }
> }
> - err2 = hib_wait_on_bio_chain(&bio);
> +
> +out_finish:
> do_gettimeofday(&stop);
> - if (!error)
> - error = err2;
> if (!error) {
> printk("\b\b\b\bdone\n");
> snapshot_write_finalize(snapshot);
> @@ -586,6 +719,11 @@ static int load_image(struct swap_map_handle *handle,
> } else
> printk("\n");
> swsusp_show_speed(&start, &stop, nr_to_read, "Read");
> +
> + vfree(cmp);
> + vfree(unc);
> + free_page((unsigned long)page);
> +
> return error;
> }
>
>
> --
> Bojan
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo(a)vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Bojan Smojver on
On Mon, 2010-08-02 at 09:17 +0900, KAMEZAWA Hiroyuki wrote:
> Now, vmallc() is used here. Then, following will happen.
>
> 1. vmalloc()
> -> vmalloc adds vmap objects and set page table entries.
>
> 2. saving image
> -> At taking snapshot of memory to the disk, above vmalloc() area
> is
> saved to disk as it is.
> ...
> 3. At restore
> Because you dont't remember which vmalloc() area was used for
> creating
> snapshot, you can't free it at swsusp_free().
>
> memory leak ?

To be honest, I'm not sure.

However, I thought that by the time save_image() is called, snapshot has
already been taken, no?
------------------
error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
if (error)
goto Thaw;

if (in_suspend) {
unsigned int flags = 0;

if (hibernation_mode == HIBERNATION_PLATFORM)
flags |= SF_PLATFORM_MODE;
pr_debug("PM: writing image.\n");
error = swsusp_write(flags); <--- this calls save_image()
------------------

So, me thinks that these allocations will not be in the snapshot image.

PS. Take everything I take with a grain (or two) of salt. I'm just a
regular Linux user trying to make my Fedora hibernate/thaw process suck
less.

--
Bojan

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: KAMEZAWA Hiroyuki on
On Mon, 02 Aug 2010 10:54:13 +1000
Bojan Smojver <bojan(a)rexursive.com> wrote:

> On Mon, 2010-08-02 at 09:17 +0900, KAMEZAWA Hiroyuki wrote:
> > Now, vmallc() is used here. Then, following will happen.
> >
> > 1. vmalloc()
> > -> vmalloc adds vmap objects and set page table entries.
> >
> > 2. saving image
> > -> At taking snapshot of memory to the disk, above vmalloc() area
> > is
> > saved to disk as it is.
> > ...
> > 3. At restore
> > Because you dont't remember which vmalloc() area was used for
> > creating
> > snapshot, you can't free it at swsusp_free().
> >
> > memory leak ?
>
> To be honest, I'm not sure.
>
> However, I thought that by the time save_image() is called, snapshot has
> already been taken, no?
> ------------------
> error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
> if (error)
> goto Thaw;
>
> if (in_suspend) {
> unsigned int flags = 0;
>
> if (hibernation_mode == HIBERNATION_PLATFORM)
> flags |= SF_PLATFORM_MODE;
> pr_debug("PM: writing image.\n");
> error = swsusp_write(flags); <--- this calls save_image()
> ------------------
>
> So, me thinks that these allocations will not be in the snapshot image.
>
I'm a very newbie to snapshot ...(I'm now studying it because I got a report
that my patch corrupts it.) So, don't trust my words.

Looking into swsusp_write().
==
swsusp_write()
-> save_image()
->
while () {
snapshot_read_next()
swap_write_page()
}
==
This routine writes a buffer which is gotten by snapshot_read_next() to the disk.

Then, what snapshot_read_next() pass is.
==
} else {
struct page *page;

page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
if (PageHighMem(page)) {
/* Highmem pages are copied to the buffer,
* because we can't return with a kmapped
* highmem page (we may not be called again).
*/
void *kaddr;

kaddr = kmap_atomic(page, KM_USER0);
memcpy(buffer, kaddr, PAGE_SIZE);
kunmap_atomic(kaddr, KM_USER0);
handle->buffer = buffer;
} else {
handle->buffer = page_address(page);
}
}
==
The physical memory address of a page to be saved.

So, I thought "system memory image" itself is not a snapshot but it's changing
while it runs. Why swsusp can avoid memory leak is that it records which
pages should be freed after resume in the bitmap, which will be saved to
image header(?) And, even if this snapshot saves the image of buddy-allocator,
the save routine itself uses a fixed buffer which can be freed after restore.

Thanks,
-Kame












--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Bojan Smojver on
On Mon, 2010-08-02 at 10:10 +0900, KAMEZAWA Hiroyuki wrote:
> Why swsusp can avoid memory leak is that it records which
> pages should be freed after resume in the bitmap, which will be saved
> to image header(?)

Right. So, are you saying that all allocations in save_image() should be
done using __get_free_page() or __get_free_pages() and not with
vmalloc()?

--
Bojan

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: KAMEZAWA Hiroyuki on
On Mon, 02 Aug 2010 11:21:08 +1000
Bojan Smojver <bojan(a)rexursive.com> wrote:

> On Mon, 2010-08-02 at 10:10 +0900, KAMEZAWA Hiroyuki wrote:
> > Why swsusp can avoid memory leak is that it records which
> > pages should be freed after resume in the bitmap, which will be saved
> > to image header(?)
>
> Right. So, are you saying that all allocations in save_image() should be
> done using __get_free_page() or __get_free_pages() and not with
> vmalloc()?
>
I don't say so but a consideration about following is required.
(And it's good to write "we're safe because...as comment")

1. Information about pointers used for vmalloc are saved into image.
2. Information(1) is properly recovered after resume and we can free it.
3. No more allocation will happen once we start wriritng to the disk.

Then, vmalloc() area itself's information will be saved as
"this vmalloc area is used"

and, at resume, recoreved as
"this vmalloc area is used"

Then, you can free it because you remember pointers.

Then, you should make

@@ -372,6 +380,38 @@ static int save_image(struct swap_map_handle *handle,
struct bio *bio;
struct timeval start;
struct timeval stop;
+ size_t ul, cl;
+ unsigned char *unc, *cmp, *wrk, *page;

as global variable. Because global variables will be saved as it is,
you can find it after resume and free used vmalloc() buffers.

Maybe freeing it at swsusp_free() will be clean.


Thanks,
-Kame

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/