From: Peter Zijlstra on
On Wed, 2010-01-06 at 13:52 -0500, Trond Myklebust wrote:
> On Wed, 2010-01-06 at 19:37 +0100, Peter Zijlstra wrote:
> > On Wed, 2010-01-06 at 13:26 -0500, Trond Myklebust wrote:
> > > OK. It looks as if the only key to finding out how many unstable writes
> > > we have is to use global_page_state(NR_UNSTABLE_NFS), so we can't
> > > specifically target our own backing-dev.
> >
> > Would be a simple matter of splitting BDI_UNSTABLE out from
> > BDI_RECLAIMABLE, no?
> >
> > Something like
>
> OK. How about if we also add in a bdi->capabilities flag to tell that we
> might have BDI_UNSTABLE? That would allow us to avoid the potentially
> expensive extra calls to bdi_stat() and bdi_stat_sum() for the non-nfs
> case?

The bdi_stat_sum() in the error limit is basically the only such
expensive op, but I suspect we might hit that more than enough. So sure
that sounds like a plan.

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Trond Myklebust on
On Wed, 2010-01-06 at 20:07 +0100, Peter Zijlstra wrote:
> On Wed, 2010-01-06 at 13:52 -0500, Trond Myklebust wrote:
> > On Wed, 2010-01-06 at 19:37 +0100, Peter Zijlstra wrote:
> > > On Wed, 2010-01-06 at 13:26 -0500, Trond Myklebust wrote:
> > > > OK. It looks as if the only key to finding out how many unstable writes
> > > > we have is to use global_page_state(NR_UNSTABLE_NFS), so we can't
> > > > specifically target our own backing-dev.
> > >
> > > Would be a simple matter of splitting BDI_UNSTABLE out from
> > > BDI_RECLAIMABLE, no?
> > >
> > > Something like
> >
> > OK. How about if we also add in a bdi->capabilities flag to tell that we
> > might have BDI_UNSTABLE? That would allow us to avoid the potentially
> > expensive extra calls to bdi_stat() and bdi_stat_sum() for the non-nfs
> > case?
>
> The bdi_stat_sum() in the error limit is basically the only such
> expensive op, but I suspect we might hit that more than enough. So sure
> that sounds like a plan.
>

This should apply on top of your patch....

Cheers
Trond
------------------------------------------------------------------------------------------------
VM: Don't call bdi_stat(BDI_UNSTABLE) on non-nfs backing-devices

From: Trond Myklebust <Trond.Myklebust(a)netapp.com>

Speeds up the accounting in balance_dirty_pages() for non-nfs devices.

Signed-off-by: Trond Myklebust <Trond.Myklebust(a)netapp.com>
---

fs/nfs/client.c | 1 +
include/linux/backing-dev.h | 6 ++++++
mm/page-writeback.c | 16 +++++++++++-----
3 files changed, 18 insertions(+), 5 deletions(-)


diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ee77713..d0b060a 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -890,6 +890,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo *

server->backing_dev_info.name = "nfs";
server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
+ server->backing_dev_info.capabilities |= BDI_CAP_ACCT_UNSTABLE;

if (server->wsize > max_rpc_payload)
server->wsize = max_rpc_payload;
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 42c3e2a..8b45166 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -232,6 +232,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
#define BDI_CAP_EXEC_MAP 0x00000040
#define BDI_CAP_NO_ACCT_WB 0x00000080
#define BDI_CAP_SWAP_BACKED 0x00000100
+#define BDI_CAP_ACCT_UNSTABLE 0x00000200

#define BDI_CAP_VMFLAGS \
(BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
@@ -311,6 +312,11 @@ static inline bool bdi_cap_flush_forker(struct backing_dev_info *bdi)
return bdi == &default_backing_dev_info;
}

+static inline bool bdi_cap_account_unstable(struct backing_dev_info *bdi)
+{
+ return bdi->capabilities & BDI_CAP_ACCT_UNSTABLE;
+}
+
static inline bool mapping_cap_writeback_dirty(struct address_space *mapping)
{
return bdi_cap_writeback_dirty(mapping->backing_dev_info);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index aa26b0f..d90a0db 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -273,8 +273,9 @@ static void clip_bdi_dirty_limit(struct backing_dev_info *bdi,
avail_dirty = 0;

avail_dirty += bdi_stat(bdi, BDI_DIRTY) +
- bdi_stat(bdi, BDI_UNSTABLE) +
bdi_stat(bdi, BDI_WRITEBACK);
+ if (bdi_cap_account_unstable(bdi))
+ avail_dirty += bdi_stat(bdi, BDI_UNSTABLE);

*pbdi_dirty = min(*pbdi_dirty, avail_dirty);
}
@@ -512,8 +513,9 @@ static void balance_dirty_pages(struct address_space *mapping,
nr_unstable_nfs;
nr_writeback = global_page_state(NR_WRITEBACK);

- bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
- bdi_stat(bdi, BDI_UNSTABLE);
+ bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
+ if (bdi_cap_account_unstable(bdi))
+ bdi_nr_reclaimable += bdi_stat(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);

if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
@@ -563,11 +565,15 @@ static void balance_dirty_pages(struct address_space *mapping,
* deltas.
*/
if (bdi_thresh < 2*bdi_stat_error(bdi)) {
- bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY) +
+ bdi_nr_reclaimable = bdi_stat_sum(bdi, BDI_DIRTY);
+ if (bdi_cap_account_unstable(bdi))
+ bdi_nr_reclaimable +=
bdi_stat_sum(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat_sum(bdi, BDI_WRITEBACK);
} else if (bdi_nr_reclaimable) {
- bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY) +
+ bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
+ if (bdi_cap_account_unstable(bdi))
+ bdi_nr_reclaimable +=
bdi_stat(bdi, BDI_UNSTABLE);
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Trond Myklebust on
On Wed, 2010-01-06 at 14:21 -0500, Trond Myklebust wrote:
> On Wed, 2010-01-06 at 20:07 +0100, Peter Zijlstra wrote:
> > On Wed, 2010-01-06 at 13:52 -0500, Trond Myklebust wrote:
> > > On Wed, 2010-01-06 at 19:37 +0100, Peter Zijlstra wrote:
> > > > On Wed, 2010-01-06 at 13:26 -0500, Trond Myklebust wrote:
> > > > > OK. It looks as if the only key to finding out how many unstable writes
> > > > > we have is to use global_page_state(NR_UNSTABLE_NFS), so we can't
> > > > > specifically target our own backing-dev.
> > > >
> > > > Would be a simple matter of splitting BDI_UNSTABLE out from
> > > > BDI_RECLAIMABLE, no?
> > > >
> > > > Something like
> > >
> > > OK. How about if we also add in a bdi->capabilities flag to tell that we
> > > might have BDI_UNSTABLE? That would allow us to avoid the potentially
> > > expensive extra calls to bdi_stat() and bdi_stat_sum() for the non-nfs
> > > case?
> >
> > The bdi_stat_sum() in the error limit is basically the only such
> > expensive op, but I suspect we might hit that more than enough. So sure
> > that sounds like a plan.
> >
>
> This should apply on top of your patch....

....and finally, this should convert the previous NFS patch to use the
per-bdi accounting.

Cheers
Trond

--------------------------------------------------------------------------------------
VM: Use per-bdi unstable accounting to improve use of wbc->force_commit

From: Trond Myklebust <Trond.Myklebust(a)netapp.com>

Signed-off-by: Trond Myklebust <Trond.Myklebust(a)netapp.com>
---

mm/page-writeback.c | 13 +++++++------
1 files changed, 7 insertions(+), 6 deletions(-)


diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d90a0db..c537543 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -487,7 +487,6 @@ static void balance_dirty_pages(struct address_space *mapping,
{
long nr_reclaimable, bdi_nr_reclaimable;
long nr_writeback, bdi_nr_writeback;
- long nr_unstable_nfs;
unsigned long background_thresh;
unsigned long dirty_thresh;
unsigned long bdi_thresh;
@@ -504,18 +503,20 @@ static void balance_dirty_pages(struct address_space *mapping,
.nr_to_write = write_chunk,
.range_cyclic = 1,
};
+ long bdi_nr_unstable = 0;

get_dirty_limits(&background_thresh, &dirty_thresh,
&bdi_thresh, bdi);

- nr_unstable_nfs = global_page_state(NR_UNSTABLE_NFS);
nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
- nr_unstable_nfs;
+ global_page_state(NR_UNSTABLE_NFS);
nr_writeback = global_page_state(NR_WRITEBACK);

bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
- if (bdi_cap_account_unstable(bdi))
- bdi_nr_reclaimable += bdi_stat(bdi, BDI_UNSTABLE);
+ if (bdi_cap_account_unstable(bdi)) {
+ bdi_nr_unstable = bdi_stat(bdi, BDI_UNSTABLE);
+ bdi_nr_reclaimable += bdi_nr_unstable;
+ }
bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);

if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
@@ -545,7 +546,7 @@ static void balance_dirty_pages(struct address_space *mapping,
if (bdi_nr_reclaimable > bdi_thresh) {
wbc.force_commit = 0;
/* Force NFS to also free up unstable writes. */
- if (nr_unstable_nfs > nr_reclaimable / 2)
+ if (bdi_nr_unstable > bdi_nr_reclaimable / 2)
wbc.force_commit = 1;

writeback_inodes_wbc(&wbc);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
From: Jan Kara on
On Wed 06-01-10 14:53:14, Trond Myklebust wrote:
> ...and finally, this should convert the previous NFS patch to use the
> per-bdi accounting.
>
> Cheers
> Trond
>
> --------------------------------------------------------------------------------------
> VM: Use per-bdi unstable accounting to improve use of wbc->force_commit
>
> From: Trond Myklebust <Trond.Myklebust(a)netapp.com>
>
> Signed-off-by: Trond Myklebust <Trond.Myklebust(a)netapp.com>
I like this. You can add
Acked-by: Jan Kara <jack(a)suse.cz>
to this patch previous patches adding unstable pages accounting.

Honza
> ---
>
> mm/page-writeback.c | 13 +++++++------
> 1 files changed, 7 insertions(+), 6 deletions(-)
>
>
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index d90a0db..c537543 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -487,7 +487,6 @@ static void balance_dirty_pages(struct address_space *mapping,
> {
> long nr_reclaimable, bdi_nr_reclaimable;
> long nr_writeback, bdi_nr_writeback;
> - long nr_unstable_nfs;
> unsigned long background_thresh;
> unsigned long dirty_thresh;
> unsigned long bdi_thresh;
> @@ -504,18 +503,20 @@ static void balance_dirty_pages(struct address_space *mapping,
> .nr_to_write = write_chunk,
> .range_cyclic = 1,
> };
> + long bdi_nr_unstable = 0;
>
> get_dirty_limits(&background_thresh, &dirty_thresh,
> &bdi_thresh, bdi);
>
> - nr_unstable_nfs = global_page_state(NR_UNSTABLE_NFS);
> nr_reclaimable = global_page_state(NR_FILE_DIRTY) +
> - nr_unstable_nfs;
> + global_page_state(NR_UNSTABLE_NFS);
> nr_writeback = global_page_state(NR_WRITEBACK);
>
> bdi_nr_reclaimable = bdi_stat(bdi, BDI_DIRTY);
> - if (bdi_cap_account_unstable(bdi))
> - bdi_nr_reclaimable += bdi_stat(bdi, BDI_UNSTABLE);
> + if (bdi_cap_account_unstable(bdi)) {
> + bdi_nr_unstable = bdi_stat(bdi, BDI_UNSTABLE);
> + bdi_nr_reclaimable += bdi_nr_unstable;
> + }
> bdi_nr_writeback = bdi_stat(bdi, BDI_WRITEBACK);
>
> if (bdi_nr_reclaimable + bdi_nr_writeback <= bdi_thresh)
> @@ -545,7 +546,7 @@ static void balance_dirty_pages(struct address_space *mapping,
> if (bdi_nr_reclaimable > bdi_thresh) {
> wbc.force_commit = 0;
> /* Force NFS to also free up unstable writes. */
> - if (nr_unstable_nfs > nr_reclaimable / 2)
> + if (bdi_nr_unstable > bdi_nr_reclaimable / 2)
> wbc.force_commit = 1;
>
> writeback_inodes_wbc(&wbc);
>
--
Jan Kara <jack(a)suse.cz>
SUSE Labs, CR
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/