From: Josef Bacik on
On Tue, May 18, 2010 at 05:05:47PM -0400, Josef Bacik wrote:
> In order for AIO to work, we need to implement aio_write. This patch converts
> our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and
> nothing broke, and the AIO stuff magically started working. Thanks,
>
> Signed-off-by: Josef Bacik <josef(a)redhat.com>
> ---
> fs/btrfs/extent_io.c | 11 ++++-
> fs/btrfs/file.c | 145 +++++++++++++++++++++++--------------------------
> 2 files changed, 78 insertions(+), 78 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index d2d0368..c407f1c 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2020,6 +2020,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
> sector_t sector;
> struct extent_map *em;
> struct block_device *bdev;
> + struct btrfs_ordered_extent *ordered;
> int ret;
> int nr = 0;
> size_t page_offset = 0;
> @@ -2031,7 +2032,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
> set_page_extent_mapped(page);
>
> end = page_end;
> - lock_extent(tree, start, end, GFP_NOFS);
> + while (1) {
> + lock_extent(tree, start, end, GFP_NOFS);
> + ordered = btrfs_lookup_ordered_extent(inode, start);
> + if (!ordered)
> + break;
> + unlock_extent(tree, start, end, GFP_NOFS);
> + btrfs_start_ordered_extent(inode, ordered, 1);
> + btrfs_put_ordered_extent(ordered);
> + }
>
> if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
> char *userpage;
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index dace07b..132bd4c 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -46,32 +46,42 @@
> static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
> int write_bytes,
> struct page **prepared_pages,
> - const char __user *buf)
> + struct iov_iter *i)
> {
> - long page_fault = 0;
> - int i;
> + size_t copied;
> + int pg = 0;
> int offset = pos & (PAGE_CACHE_SIZE - 1);
>
> - for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
> + while (write_bytes > 0) {
> size_t count = min_t(size_t,
> PAGE_CACHE_SIZE - offset, write_bytes);
> - struct page *page = prepared_pages[i];
> - fault_in_pages_readable(buf, count);
> + struct page *page = prepared_pages[pg];
> +again:
> + if (unlikely(iov_iter_fault_in_readable(i, count)))
> + return -EFAULT;
>
> /* Copy data from userspace to the current page */
> - kmap(page);
> - page_fault = __copy_from_user(page_address(page) + offset,
> - buf, count);
> + copied = iov_iter_copy_from_user(page, i, offset, count);
> +
> /* Flush processor's dcache for this page */
> flush_dcache_page(page);
> - kunmap(page);
> - buf += count;
> - write_bytes -= count;
> + iov_iter_advance(i, copied);
> + write_bytes -= copied;
>
> - if (page_fault)
> - break;
> + if (unlikely(copied == 0)) {
> + count = min_t(size_t, PAGE_CACHE_SIZE - offset,
> + iov_iter_single_seg_count(i));
> + goto again;
> + }
> +
> + if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
> + offset += copied;
> + } else {
> + pg++;
> + offset = 0;
> + }
> }
> - return page_fault ? -EFAULT : 0;
> + return 0;
> }
>
> /*
> @@ -823,60 +833,24 @@ again:
> return 0;
> }
>
> -/* Copied from read-write.c */
> -static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
> -{
> - set_current_state(TASK_UNINTERRUPTIBLE);
> - if (!kiocbIsKicked(iocb))
> - schedule();
> - else
> - kiocbClearKicked(iocb);
> - __set_current_state(TASK_RUNNING);
> -}
> -
> -/*
> - * Just a copy of what do_sync_write does.
> - */
> -static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf,
> - size_t count, loff_t pos, loff_t *ppos)
> +static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
> + const struct iovec *iov,
> + unsigned long nr_segs, loff_t pos)
> {
> - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
> - unsigned long nr_segs = 1;
> - struct kiocb kiocb;
> - ssize_t ret;
> -
> - init_sync_kiocb(&kiocb, file);
> - kiocb.ki_pos = pos;
> - kiocb.ki_left = count;
> - kiocb.ki_nbytes = count;
> -
> - while (1) {
> - ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos,
> - ppos, count, count);
> - if (ret != -EIOCBRETRY)
> - break;
> - wait_on_retry_sync_kiocb(&kiocb);
> - }
> -
> - if (ret == -EIOCBQUEUED)
> - ret = wait_on_sync_kiocb(&kiocb);
> - *ppos = kiocb.ki_pos;
> - return ret;
> -}
> -
> -static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> - size_t count, loff_t *ppos)
> -{
> - loff_t pos;
> + struct file *file = iocb->ki_filp;
> + struct inode *inode = fdentry(file)->d_inode;
> + struct btrfs_root *root = BTRFS_I(inode)->root;
> + struct page *pinned[2];
> + struct page **pages = NULL;
> + struct iov_iter i;
> + loff_t *ppos = &iocb->ki_pos;
> loff_t start_pos;
> ssize_t num_written = 0;
> ssize_t err = 0;
> + size_t count;
> + size_t ocount;
> int ret = 0;
> - struct inode *inode = fdentry(file)->d_inode;
> - struct btrfs_root *root = BTRFS_I(inode)->root;
> - struct page **pages = NULL;
> int nrptrs;
> - struct page *pinned[2];
> unsigned long first_index;
> unsigned long last_index;
> int will_write;
> @@ -888,7 +862,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> pinned[0] = NULL;
> pinned[1] = NULL;
>
> - pos = *ppos;
> start_pos = pos;
>
> vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
> @@ -902,6 +875,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
>
> mutex_lock(&inode->i_mutex);
>
> + err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
> + if (err)
> + goto out;
> + count = ocount;
> +
> current->backing_dev_info = inode->i_mapping->backing_dev_info;
> err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
> if (err)
> @@ -918,14 +896,28 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> BTRFS_I(inode)->sequence++;
>
> if (unlikely(file->f_flags & O_DIRECT)) {
> - num_written = __btrfs_direct_write(file, buf, count, pos,
> - ppos);
> - pos += num_written;
> - count -= num_written;
> + ret = btrfs_check_data_free_space(root, inode, count);
> + if (ret)
> + goto out;
>
> - /* We've written everything we wanted to, exit */
> - if (num_written < 0 || !count)
> + num_written = generic_file_direct_write(iocb, iov, &nr_segs,
> + pos, ppos, count,
> + ocount);
> +
> + /* All reservations for DIO are done internally */
> + btrfs_free_reserved_data_space(root, inode, count);
> +
> + if (num_written > 0)
> + pos += num_written;
> + count -= num_written;
> +
> + if (num_written < 0) {
> + ret = num_written;
> + num_written = 0;
> goto out;
> + } else if (!count) {
> + goto out;
> + }
>

Hrm, it looks like this part got munged when I did my git rebase. I will fix it
up and resubmit. Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Shi Weihua on
at 2010-5-22 1:03, Josef Bacik wrote:
> In order for AIO to work, we need to implement aio_write. This patch converts
> our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and
> nothing broke, and the AIO stuff magically started working. Thanks,

But xfstests's case 198(source: src/aio-dio-regress/aiodio_sparse2.c) still failed,
following message outputted.
--------------------
AIO write offset 0 expected 65536 got -22
AIO write offset 5242880 expected 65536 got -22
AIO write offset 10485760 expected 65536 got -22
AIO write offset 15728640 expected 65536 got -22
AIO write offset 20971520 expected 65536 got -22
AIO write offset 31457280 expected 65536 got -22
AIO write offset 36700160 expected 65536 got -22
AIO write offset 41943040 expected 65536 got -22
AIO write offset 47185920 expected 65536 got -22
AIO write offset 52428800 expected 65536 got -22
AIO write offset 57671680 expected 65536 got -22
AIO write offset 62914560 expected 65536 got -22
AIO write offset 73400320 expected 65536 got -22
AIO write offset 78643200 expected 65536 got -22
non one buffer at buf[0] => 0x00,00,00,00
non-one read at offset 0
*** WARNING *** /tmp/aaaa has not been unlinked; if you don't rm it manually first, it may influence the next run
--------------------

generic_file_direct_write()(in btrfs_file_aio_write(), fs/btrfs/file.c) returned -22,
maybe it's useful for your analysing.

Thanks.

>
> Signed-off-by: Josef Bacik <josef(a)redhat.com>
> ---
> fs/btrfs/extent_io.c | 11 +++-
> fs/btrfs/file.c | 152 +++++++++++++++++++++++---------------------------
> 2 files changed, 80 insertions(+), 83 deletions(-)
>
> diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> index d2d0368..c407f1c 100644
> --- a/fs/btrfs/extent_io.c
> +++ b/fs/btrfs/extent_io.c
> @@ -2020,6 +2020,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
> sector_t sector;
> struct extent_map *em;
> struct block_device *bdev;
> + struct btrfs_ordered_extent *ordered;
> int ret;
> int nr = 0;
> size_t page_offset = 0;
> @@ -2031,7 +2032,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
> set_page_extent_mapped(page);
>
> end = page_end;
> - lock_extent(tree, start, end, GFP_NOFS);
> + while (1) {
> + lock_extent(tree, start, end, GFP_NOFS);
> + ordered = btrfs_lookup_ordered_extent(inode, start);
> + if (!ordered)
> + break;
> + unlock_extent(tree, start, end, GFP_NOFS);
> + btrfs_start_ordered_extent(inode, ordered, 1);
> + btrfs_put_ordered_extent(ordered);
> + }
>
> if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
> char *userpage;
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index dace07b..ce35431 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -46,32 +46,42 @@
> static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
> int write_bytes,
> struct page **prepared_pages,
> - const char __user *buf)
> + struct iov_iter *i)
> {
> - long page_fault = 0;
> - int i;
> + size_t copied;
> + int pg = 0;
> int offset = pos & (PAGE_CACHE_SIZE - 1);
>
> - for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
> + while (write_bytes > 0) {
> size_t count = min_t(size_t,
> PAGE_CACHE_SIZE - offset, write_bytes);
> - struct page *page = prepared_pages[i];
> - fault_in_pages_readable(buf, count);
> + struct page *page = prepared_pages[pg];
> +again:
> + if (unlikely(iov_iter_fault_in_readable(i, count)))
> + return -EFAULT;
>
> /* Copy data from userspace to the current page */
> - kmap(page);
> - page_fault = __copy_from_user(page_address(page) + offset,
> - buf, count);
> + copied = iov_iter_copy_from_user(page, i, offset, count);
> +
> /* Flush processor's dcache for this page */
> flush_dcache_page(page);
> - kunmap(page);
> - buf += count;
> - write_bytes -= count;
> + iov_iter_advance(i, copied);
> + write_bytes -= copied;
>
> - if (page_fault)
> - break;
> + if (unlikely(copied == 0)) {
> + count = min_t(size_t, PAGE_CACHE_SIZE - offset,
> + iov_iter_single_seg_count(i));
> + goto again;
> + }
> +
> + if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
> + offset += copied;
> + } else {
> + pg++;
> + offset = 0;
> + }
> }
> - return page_fault ? -EFAULT : 0;
> + return 0;
> }
>
> /*
> @@ -823,60 +833,24 @@ again:
> return 0;
> }
>
> -/* Copied from read-write.c */
> -static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
> -{
> - set_current_state(TASK_UNINTERRUPTIBLE);
> - if (!kiocbIsKicked(iocb))
> - schedule();
> - else
> - kiocbClearKicked(iocb);
> - __set_current_state(TASK_RUNNING);
> -}
> -
> -/*
> - * Just a copy of what do_sync_write does.
> - */
> -static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf,
> - size_t count, loff_t pos, loff_t *ppos)
> -{
> - struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count };
> - unsigned long nr_segs = 1;
> - struct kiocb kiocb;
> - ssize_t ret;
> -
> - init_sync_kiocb(&kiocb, file);
> - kiocb.ki_pos = pos;
> - kiocb.ki_left = count;
> - kiocb.ki_nbytes = count;
> -
> - while (1) {
> - ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos,
> - ppos, count, count);
> - if (ret != -EIOCBRETRY)
> - break;
> - wait_on_retry_sync_kiocb(&kiocb);
> - }
> -
> - if (ret == -EIOCBQUEUED)
> - ret = wait_on_sync_kiocb(&kiocb);
> - *ppos = kiocb.ki_pos;
> - return ret;
> -}
> -
> -static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> - size_t count, loff_t *ppos)
> +static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
> + const struct iovec *iov,
> + unsigned long nr_segs, loff_t pos)
> {
> - loff_t pos;
> + struct file *file = iocb->ki_filp;
> + struct inode *inode = fdentry(file)->d_inode;
> + struct btrfs_root *root = BTRFS_I(inode)->root;
> + struct page *pinned[2];
> + struct page **pages = NULL;
> + struct iov_iter i;
> + loff_t *ppos = &iocb->ki_pos;
> loff_t start_pos;
> ssize_t num_written = 0;
> ssize_t err = 0;
> + size_t count;
> + size_t ocount;
> int ret = 0;
> - struct inode *inode = fdentry(file)->d_inode;
> - struct btrfs_root *root = BTRFS_I(inode)->root;
> - struct page **pages = NULL;
> int nrptrs;
> - struct page *pinned[2];
> unsigned long first_index;
> unsigned long last_index;
> int will_write;
> @@ -888,7 +862,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> pinned[0] = NULL;
> pinned[1] = NULL;
>
> - pos = *ppos;
> start_pos = pos;
>
> vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
> @@ -902,6 +875,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
>
> mutex_lock(&inode->i_mutex);
>
> + err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ);
> + if (err)
> + goto out;
> + count = ocount;
> +
> current->backing_dev_info = inode->i_mapping->backing_dev_info;
> err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
> if (err)
> @@ -918,14 +896,24 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> BTRFS_I(inode)->sequence++;
>
> if (unlikely(file->f_flags & O_DIRECT)) {
> - num_written = __btrfs_direct_write(file, buf, count, pos,
> - ppos);
> - pos += num_written;
> - count -= num_written;
> + ret = btrfs_check_data_free_space(root, inode, count);
> + if (ret)
> + goto out;
>
> - /* We've written everything we wanted to, exit */
> - if (num_written < 0 || !count)
> + num_written = generic_file_direct_write(iocb, iov, &nr_segs,
> + pos, ppos, count,
> + ocount);
> +
> + /* All reservations for DIO are done internally */
> + btrfs_free_reserved_data_space(root, inode, count);
> +
> + if (num_written < 0) {
> + ret = num_written;
> + num_written = 0;
> + goto out;
> + } else if (num_written == count) {
> goto out;
> + }
>
> /*
> * We are going to do buffered for the rest of the range, so we
> @@ -933,18 +921,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> * done.
> */
> buffered = 1;
> - buf += num_written;
> + pos += num_written;
> }
>
> - nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE,
> - PAGE_CACHE_SIZE / (sizeof(struct page *)));
> + iov_iter_init(&i, iov, nr_segs, count, num_written);
> + nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) /
> + PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
> + (sizeof(struct page *)));
> pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
>
> /* generic_write_checks can change our pos */
> start_pos = pos;
>
> first_index = pos >> PAGE_CACHE_SHIFT;
> - last_index = (pos + count) >> PAGE_CACHE_SHIFT;
> + last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT;
>
> /*
> * there are lots of better ways to do this, but this code
> @@ -961,7 +951,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> unlock_page(pinned[0]);
> }
> }
> - if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
> + if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) {
> pinned[1] = grab_cache_page(inode->i_mapping, last_index);
> if (!PageUptodate(pinned[1])) {
> ret = btrfs_readpage(NULL, pinned[1]);
> @@ -972,10 +962,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> }
> }
>
> - while (count > 0) {
> + while (iov_iter_count(&i) > 0) {
> size_t offset = pos & (PAGE_CACHE_SIZE - 1);
> - size_t write_bytes = min(count, nrptrs *
> - (size_t)PAGE_CACHE_SIZE -
> + size_t write_bytes = min(iov_iter_count(&i),
> + nrptrs * (size_t)PAGE_CACHE_SIZE -
> offset);
> size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
> PAGE_CACHE_SHIFT;
> @@ -997,7 +987,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> }
>
> ret = btrfs_copy_from_user(pos, num_pages,
> - write_bytes, pages, buf);
> + write_bytes, pages, &i);
> if (ret) {
> btrfs_free_reserved_data_space(root, inode,
> write_bytes);
> @@ -1026,8 +1016,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
> btrfs_throttle(root);
> }
>
> - buf += write_bytes;
> - count -= write_bytes;
> pos += write_bytes;
> num_written += write_bytes;
>
> @@ -1222,7 +1210,7 @@ const struct file_operations btrfs_file_operations = {
> .read = do_sync_read,
> .aio_read = generic_file_aio_read,
> .splice_read = generic_file_splice_read,
> - .write = btrfs_file_write,
> + .aio_write = btrfs_file_aio_write,
> .mmap = btrfs_file_mmap,
> .open = generic_file_open,
> .release = btrfs_release_file,

--
Shi Weihua
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Josef Bacik on
On Sun, May 23, 2010 at 04:31:52PM +0800, Shi Weihua wrote:
> at 2010-5-22 1:03, Josef Bacik wrote:
> > In order for AIO to work, we need to implement aio_write. This patch converts
> > our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and
> > nothing broke, and the AIO stuff magically started working. Thanks,
>
> But xfstests's case 198(source: src/aio-dio-regress/aiodio_sparse2.c) still failed,
> following message outputted.
> --------------------
> AIO write offset 0 expected 65536 got -22
> AIO write offset 5242880 expected 65536 got -22
> AIO write offset 10485760 expected 65536 got -22
> AIO write offset 15728640 expected 65536 got -22
> AIO write offset 20971520 expected 65536 got -22
> AIO write offset 31457280 expected 65536 got -22
> AIO write offset 36700160 expected 65536 got -22
> AIO write offset 41943040 expected 65536 got -22
> AIO write offset 47185920 expected 65536 got -22
> AIO write offset 52428800 expected 65536 got -22
> AIO write offset 57671680 expected 65536 got -22
> AIO write offset 62914560 expected 65536 got -22
> AIO write offset 73400320 expected 65536 got -22
> AIO write offset 78643200 expected 65536 got -22
> non one buffer at buf[0] => 0x00,00,00,00
> non-one read at offset 0
> *** WARNING *** /tmp/aaaa has not been unlinked; if you don't rm it manually first, it may influence the next run
> --------------------
>
> generic_file_direct_write()(in btrfs_file_aio_write(), fs/btrfs/file.c) returned -22,
> maybe it's useful for your analysing.

Yes, change that testcase to run -a 4096 and it will run fine. Because BTRFS
doesn't pass in a bdev to __blockdev_direct_IO it doesn't do 512 byte aligned
IO, just blocksize aligned IO. I will fix that at some later point, but its a
little tricky since we have to figure out which bdev has the largest alignment
(in case we have a 4k sector device and a 512 byte sector device in the same
volume). Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: liubo on
On 05/22/2010 01:03 AM, Josef Bacik wrote:
> In order for AIO to work, we need to implement aio_write. This patch converts
> our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and
> nothing broke, and the AIO stuff magically started working. Thanks,
>
> Signed-off-by: Josef Bacik <josef(a)redhat.com>
>

Hi, Josef,

I've tested your patch(May 22) with my tools, and one case triggered a bug
which made writev operation hang up, more information is followed.

- Steps to trigger it:
# mount /dev/sda8 /home/btrfsdisk -o nodatacow
# gcc direct-io.c -o direct-io
# ./direct-io O_DIRECT writev /home/btrfsdisk/testrw 4M

then on another tty, after "dmesg"...

[snip]
device fsid f44b0879c75c0e99-1d4b28f2d5c503ae devid 1 transid 11177
/dev/sda8
btrfs: setting nodatacow
INFO: task direct-io:1399 blocked for more than 120 seconds.
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
direct-io D 0000000000000003 0 1399 1341 0x00000000
ffff880137c379c8 0000000000000082 ffff880137c379d8 ffffffff00000000
ffff880137c37fd8 ffff880139730000 0000000000015440 ffff880137c37fd8
0000000000015440 0000000000015440 0000000000015440 0000000000015440
Call Trace:
[<ffffffffa0119d4a>] wait_extent_bit+0xe3/0x163 [btrfs]
[<ffffffff8106651f>] ? autoremove_wake_function+0x0/0x39
[<ffffffffa0119e47>] lock_extent_bits+0x7d/0xa8 [btrfs]
[<ffffffffa0119e88>] lock_extent+0x16/0x18 [btrfs]
[<ffffffffa01025ce>] btrfs_direct_IO+0x8e/0x1be [btrfs]
[<ffffffff810c7301>] generic_file_direct_write+0xed/0x16d
[<ffffffffa010bb91>] btrfs_file_aio_write+0x2af/0x8d2 [btrfs]
[<ffffffff81100eae>] ? try_get_mem_cgroup_from_mm+0x39/0x49
[<ffffffffa010b8e2>] ? btrfs_file_aio_write+0x0/0x8d2 [btrfs]
[<ffffffff811063ed>] do_sync_readv_writev+0xc1/0x100
[<ffffffff81106120>] ? might_fault+0x21/0x23
[<ffffffff81106151>] ? copy_from_user+0x2f/0x31
[<ffffffff811c90ab>] ? security_file_permission+0x16/0x18
[<ffffffff81107145>] do_readv_writev+0xa7/0x127
[<ffffffff81107208>] vfs_writev+0x43/0x4e
[<ffffffff811072f8>] sys_writev+0x4a/0x93
[<ffffffff81009c32>] system_call_fastpath+0x16/0x1b


So, can you figure out if anything in your patch leads to the bug?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: liubo on
On 05/27/2010 11:06 AM, liubo wrote:
> On 05/22/2010 01:03 AM, Josef Bacik wrote:
>
>> In order for AIO to work, we need to implement aio_write. This patch converts
>> our btrfs_file_write to btrfs_aio_write. I've tested this with xfstests and
>> nothing broke, and the AIO stuff magically started working. Thanks,
>>
>> Signed-off-by: Josef Bacik <josef(a)redhat.com>
>>
>>
>
> Hi, Josef,
>
> I've tested your patch(May 22) with my tools, and one case triggered a bug
> which made writev operation hang up, more information is followed.
>
> - Steps to trigger it:
> # mount /dev/sda8 /home/btrfsdisk -o nodatacow
> # gcc direct-io.c -o direct-io
> # ./direct-io O_DIRECT writev /home/btrfsdisk/testrw 4M
>
> then on another tty, after "dmesg"...
>
> [snip]
> device fsid f44b0879c75c0e99-1d4b28f2d5c503ae devid 1 transid 11177
> /dev/sda8
> btrfs: setting nodatacow
> INFO: task direct-io:1399 blocked for more than 120 seconds.
> "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
> direct-io D 0000000000000003 0 1399 1341 0x00000000
> ffff880137c379c8 0000000000000082 ffff880137c379d8 ffffffff00000000
> ffff880137c37fd8 ffff880139730000 0000000000015440 ffff880137c37fd8
> 0000000000015440 0000000000015440 0000000000015440 0000000000015440
> Call Trace:
> [<ffffffffa0119d4a>] wait_extent_bit+0xe3/0x163 [btrfs]
> [<ffffffff8106651f>] ? autoremove_wake_function+0x0/0x39
> [<ffffffffa0119e47>] lock_extent_bits+0x7d/0xa8 [btrfs]
> [<ffffffffa0119e88>] lock_extent+0x16/0x18 [btrfs]
> [<ffffffffa01025ce>] btrfs_direct_IO+0x8e/0x1be [btrfs]
> [<ffffffff810c7301>] generic_file_direct_write+0xed/0x16d
> [<ffffffffa010bb91>] btrfs_file_aio_write+0x2af/0x8d2 [btrfs]
> [<ffffffff81100eae>] ? try_get_mem_cgroup_from_mm+0x39/0x49
> [<ffffffffa010b8e2>] ? btrfs_file_aio_write+0x0/0x8d2 [btrfs]
> [<ffffffff811063ed>] do_sync_readv_writev+0xc1/0x100
> [<ffffffff81106120>] ? might_fault+0x21/0x23
> [<ffffffff81106151>] ? copy_from_user+0x2f/0x31
> [<ffffffff811c90ab>] ? security_file_permission+0x16/0x18
> [<ffffffff81107145>] do_readv_writev+0xa7/0x127
> [<ffffffff81107208>] vfs_writev+0x43/0x4e
> [<ffffffff811072f8>] sys_writev+0x4a/0x93
> [<ffffffff81009c32>] system_call_fastpath+0x16/0x1b
>
>
> So, can you figure out if anything in your patch leads to the bug?
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majordomo(a)vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
>

Sorry, I forgot the attachment...

Plz get it.

Thanks,

- Liubo