From: Dipankar Sarma on
From: Paul McKenney <paulmck(a)us.ibm.com>

This patch implements a new version of RCU which allows its read-side
critical sections to be preempted. It uses a set of counter pairs
to keep track of the read-side critical sections and flips them
when all tasks exit read-side critical section. The details
of this implementation can be found in this paper -

http://www.rdrop.com/users/paulmck/RCU/OLSrtRCU.2006.08.11a.pdf

This patch was developed as a part of the -rt kernel
development and meant to provide better latencies when
read-side critical sections of RCU don't disable preemption.
As a consequence of keeping track of RCU readers, the readers
have a slight overhead (optimizations in the paper).
This implementation co-exists with the "classic" RCU
implementations and can be switched to at compiler.

Signed-off-by: Paul McKenney <paulmck(a)us.ibm.com>
Signed-off-by: Dipankar Sarma <dipankar(a)in.ibm.com>


include/linux/rcupdate.h | 5
include/linux/rcupreempt.h | 66 ++++++
include/linux/sched.h | 6
kernel/Kconfig.preempt | 37 +++
kernel/Makefile | 4
kernel/rcupreempt.c | 464 +++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 581 insertions(+), 1 deletion(-)

diff -puN include/linux/rcupdate.h~rcu-preempt include/linux/rcupdate.h
--- linux-2.6.18-rc6-mm1-rcu/include/linux/rcupdate.h~rcu-preempt 2006-09-23 10:00:31.000000000 +0530
+++ linux-2.6.18-rc6-mm1-rcu-dipankar/include/linux/rcupdate.h 2006-09-23 10:00:31.000000000 +0530
@@ -41,7 +41,12 @@
#include <linux/percpu.h>
#include <linux/cpumask.h>
#include <linux/seqlock.h>
+#include <linux/config.h>
+#ifdef CONFIG_CLASSIC_RCU
#include <linux/rcuclassic.h>
+#else
+#include <linux/rcupreempt.h>
+#endif

/**
* struct rcu_head - callback structure for use with RCU
diff -puN /dev/null include/linux/rcupreempt.h
--- /dev/null 2006-09-20 22:20:57.873117750 +0530
+++ linux-2.6.18-rc6-mm1-rcu-dipankar/include/linux/rcupreempt.h 2006-09-23 10:00:31.000000000 +0530
@@ -0,0 +1,66 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (RT implementation)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Paul McKenney <paulmck(a)us.ibm.com>
+ *
+ * Based on the original work by Paul McKenney <paul.mckenney(a)us.ibm.com>
+ * and inputs from Rusty Russell, Andrea Arcangeli and Andi Kleen.
+ * Papers:
+ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
+ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * http://lse.sourceforge.net/locking/rcupdate.html
+ *
+ */
+
+#ifndef __LINUX_RCUPREEMPT_H
+#define __LINUX_RCUPREEMPT_H
+
+#ifdef __KERNEL__
+
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/threads.h>
+#include <linux/percpu.h>
+#include <linux/cpumask.h>
+#include <linux/seqlock.h>
+
+#define rcu_qsctr_inc(cpu)
+#define rcu_bh_qsctr_inc(cpu)
+#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
+
+extern void __rcu_read_lock(void);
+extern void __rcu_read_unlock(void);
+extern int rcu_pending(int cpu);
+
+#define __rcu_read_lock_bh() { rcu_read_lock(); local_bh_disable(); }
+#define __rcu_read_unlock_bh() { local_bh_enable(); rcu_read_unlock(); }
+
+#define __rcu_read_lock_nesting() (current->rcu_read_lock_nesting)
+
+extern void __synchronize_sched(void);
+
+extern void __rcu_init(void);
+extern void rcu_check_callbacks(int cpu, int user);
+extern void rcu_restart_cpu(int cpu);
+extern long rcu_batches_completed(void);
+
+#endif /* __KERNEL__ */
+#endif /* __LINUX_RCUPREEMPT_H */
diff -puN include/linux/sched.h~rcu-preempt include/linux/sched.h
--- linux-2.6.18-rc6-mm1-rcu/include/linux/sched.h~rcu-preempt 2006-09-23 10:00:31.000000000 +0530
+++ linux-2.6.18-rc6-mm1-rcu-dipankar/include/linux/sched.h 2006-09-23 10:00:31.000000000 +0530
@@ -826,6 +826,12 @@ struct task_struct {
cpumask_t cpus_allowed;
unsigned int time_slice, first_time_slice;

+#ifdef CONFIG_PREEMPT_RCU
+ int rcu_read_lock_nesting;
+ atomic_t *rcu_flipctr1;
+ atomic_t *rcu_flipctr2;
+#endif
+
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
#endif
diff -puN kernel/Kconfig.preempt~rcu-preempt kernel/Kconfig.preempt
--- linux-2.6.18-rc6-mm1-rcu/kernel/Kconfig.preempt~rcu-preempt 2006-09-23 10:00:31.000000000 +0530
+++ linux-2.6.18-rc6-mm1-rcu-dipankar/kernel/Kconfig.preempt 2006-09-23 10:00:31.000000000 +0530
@@ -63,3 +63,40 @@ config PREEMPT_BKL
Say Y here if you are building a kernel for a desktop system.
Say N if you are unsure.

+choice
+ prompt "RCU implementation type:"
+ default CLASSIC_RCU
+
+config CLASSIC_RCU
+ bool "Classic RCU"
+ help
+ This option selects the classic RCU implementation that is
+ designed for best read-side performance on non-realtime
+ systems.
+
+ Say Y if you are unsure.
+
+config PREEMPT_RCU
+ bool "Preemptible RCU"
+ help
+ This option reduces the latency of the kernel by making certain
+ RCU sections preemptible. Normally RCU code is non-preemptible, if
+ this option is selected then read-only RCU sections become
+ preemptible. This helps latency, but may expose bugs due to
+ now-naive assumptions about each RCU read-side critical section
+ remaining on a given CPU through its execution.
+
+ Say N if you are unsure.
+
+endchoice
+
+config RCU_STATS
+ bool "/proc stats for preemptible RCU read-side critical sections"
+ depends on PREEMPT_RCU
+ default y
+ help
+ This option provides /proc stats to provide debugging info for
+ the preempt