diff -urpNa -X dontdiff linux-2.6.34-rc1/arch/powerpc/platforms/pseries/offline_states.h linux-2.6.34-rc1-tip.007b0924-rcu/arch/powerpc/platforms/pseries/offline_states.h --- linux-2.6.34-rc1/arch/powerpc/platforms/pseries/offline_states.h 2010-02-24 10:52:17.000000000 -0800 +++ linux-2.6.34-rc1-tip.007b0924-rcu/arch/powerpc/platforms/pseries/offline_states.h 2010-03-15 16:32:29.000000000 -0700 @@ -9,10 +9,40 @@ enum cpu_state_vals { CPU_MAX_OFFLINE_STATES }; +#ifdef CONFIG_HOTPLUG_CPU + extern enum cpu_state_vals get_cpu_current_state(int cpu); extern void set_cpu_current_state(int cpu, enum cpu_state_vals state); extern enum cpu_state_vals get_preferred_offline_state(int cpu); extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state); extern void set_default_offline_state(int cpu); + +#else /* #ifdef CONFIG_HOTPLUG_CPU */ + +static inline enum cpu_state_vals get_cpu_current_state(int cpu) +{ + return cpu_online(cpu) ? CPU_STATE_ONLINE : CPU_STATE_OFFLINE; +} + +static inline void set_cpu_current_state(int cpu, enum cpu_state_vals state) +{ +} + +static inline enum cpu_state_vals get_preferred_offline_state(int cpu) +{ + return CPU_STATE_OFFLINE; +} + +static inline void set_preferred_offline_state(int cpu, + enum cpu_state_vals state) +{ +} + +static inline void set_default_offline_state(int cpu) +{ +} + +#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ + extern int start_secondary(void); #endif diff -urpNa -X dontdiff linux-2.6.34-rc1/Documentation/RCU/checklist.txt linux-2.6.34-rc1-tip.007b0924-rcu/Documentation/RCU/checklist.txt --- linux-2.6.34-rc1/Documentation/RCU/checklist.txt 2010-03-15 12:45:49.000000000 -0700 +++ linux-2.6.34-rc1-tip.007b0924-rcu/Documentation/RCU/checklist.txt 2010-04-08 10:01:07.000000000 -0700 @@ -260,7 +260,8 @@ over a rather long period of time, but i The reason that it is permissible to use RCU list-traversal primitives when the update-side lock is held is that doing so can be quite helpful in reducing code bloat when common code is - shared between readers and updaters. + shared between readers and updaters. Additional primitives + are provided for this case, as discussed in lockdep.txt. 10. Conversely, if you are in an RCU read-side critical section, and you don't hold the appropriate update-side lock, you -must- @@ -344,8 +345,8 @@ over a rather long period of time, but i requiring SRCU's read-side deadlock immunity or low read-side realtime latency. - Note that, rcu_assign_pointer() and rcu_dereference() relate to - SRCU just as they do to other forms of RCU. + Note that, rcu_assign_pointer() relates to SRCU just as they do + to other forms of RCU. 15. The whole point of call_rcu(), synchronize_rcu(), and friends is to wait until all pre-existing readers have finished before diff -urpNa -X dontdiff linux-2.6.34-rc1/Documentation/RCU/lockdep.txt linux-2.6.34-rc1-tip.007b0924-rcu/Documentation/RCU/lockdep.txt --- linux-2.6.34-rc1/Documentation/RCU/lockdep.txt 2010-03-15 12:45:49.000000000 -0700 +++ linux-2.6.34-rc1-tip.007b0924-rcu/Documentation/RCU/lockdep.txt 2010-04-08 10:01:07.000000000 -0700 @@ -32,9 +32,20 @@ checking of rcu_dereference() primitives srcu_dereference(p, sp): Check for SRCU read-side critical section. rcu_dereference_check(p, c): - Use explicit check expression "c". + Use explicit check expression "c". This is useful in + code that is invoked by both readers and updaters. rcu_dereference_raw(p) Don't check. (Use sparingly, if at all.) + rcu_dereference_protected(p, c): + Use explicit check expression "c", and omit all barriers + and compiler constraints. This is useful when the data + structure cannot change, for example, in code that is + invoked only by updaters. + rcu_access_pointer(p): + Return the value of the pointer and omit all barriers, + but retain the compiler constraints that prevent duplicating + or coalescsing. This is useful when when testing the + value of the pointer itself, for example, against NULL. The rcu_dereference_check() check expression can be any boolean expression, but would normally include one of the rcu_read_lock_held() @@ -59,7 +70,20 @@ In case (1), the pointer is picked up in RCU read-side critical sections, in case (2) the ->file_lock prevents any change from taking place, and finally, in case (3) the current task is the only task accessing the file_struct, again preventing any change -from taking place. +from taking place. If the above statement was invoked only from updater +code, it could instead be written as follows: + + file = rcu_dereference_protected(fdt->fd[fd], + lockdep_is_held(&files->file_lock) || + atomic_read(&files->count) == 1); + +This would verify cases #2 and #3 above, and furthermore lockdep would +complain if this was used in an RCU read-side critical section unless one +of these two cases held. Because rcu_dereference_protected() omits all +barriers and compiler constraints, it generates better code than do the +other flavors of rcu_dereference(). On the other hand, it is illegal +to use rcu_dereference_protected() if either the RCU-protected pointer +or the RCU-protected data that it points to can change concurrently. There are currently only "universal" versions of the rcu_assign_pointer() and RCU list-/tree-traversal primitives, which do not (yet) check for diff -urpNa -X dontdiff linux-2.6.34-rc1/Documentation/RCU/NMI-RCU.txt linux-2.6.34-rc1-tip.007b0924-rcu/Documentation/RCU/NMI-RCU.txt --- linux-2.6.34-rc1/Documentation/RCU/NMI-RCU.txt 2010-02-24 10:52:17.000000000 -0800 +++ linux-2.6.34-rc1-tip.007b0924-rcu/Documentation/RCU/NMI-RCU.txt 2010-04-08 10:01:07.000000000 -0700 @@ -34,7 +34,7 @@ NMI handler. cpu = smp_processor_id(); ++nmi_count(cpu); - if (!rcu_dereference(nmi_callback)(regs, cpu)) + if (!rcu_dereference_sched(nmi_callback)(regs, cpu)) default_do_nmi(regs); nmi_exit(); @@ -47,12 +47,13 @@ function pointer. If this handler retur default_do_nmi() function to handle a machine-specific NMI. Finally, preemption is restored. -Strictly speaking, rcu_dereference() is not needed, since this code runs -only on i386, which does not need rcu_dereference() anyway. However, -it is a good documentation aid, particularly for anyone attempting to -do something similar on Alpha. +In theory, rcu_dereference_sched() is not needed, since this code runs +only on i386, which in theory does not need rcu_dereference_sched() +anyway. However, in practice it is a good documentation aid, particularly +for anyone attempting to do something similar on Alpha or on systems +with aggressive optimizing compilers. -Quick Quiz: Why might the rcu_dereference() be necessary on Alpha, +Quick Quiz: Why might the rcu_dereference_sched() be necessary on Alpha, given that the code referenced by the pointer is read-only? @@ -99,17 +100,21 @@ invoke irq_enter() and irq_exit() on NMI Answer to Quick Quiz - Why might the rcu_dereference() be necessary on Alpha, given + Why might the rcu_dereference_sched() be necessary on Alpha, given that the code referenced by the pointer is read-only? Answer: The caller to set_nmi_callback() might well have - initialized some data that is to be used by the - new NMI handler. In this case, the rcu_dereference() - would be needed, because otherwise a CPU that received - an NMI just after the new handler was set might see - the pointer to the new NMI handler, but the old - pre-initialized version of the handler's data. - - More important, the rcu_dereference() makes it clear - to someone reading the code that the pointer is being - protected by RCU. + initialized some data that is to be used by the new NMI + handler. In this case, the rcu_dereference_sched() would + be needed, because otherwise a CPU that received an NMI + just after the new handler was set might see the pointer + to the new NMI handler, but the old pre-initialized + version of the handler's data. + + This same sad story can happen on other CPUs when using + a compiler with aggressive pointer-value speculation + optimizations. + + More important, the rcu_dereference_sched() makes it + clear to someone reading the code that the pointer is + being protected by RCU-sched. diff -urpNa -X dontdiff linux-2.6.34-rc1/Documentation/RCU/whatisRCU.txt linux-2.6.34-rc1-tip.007b0924-rcu/Documentation/RCU/whatisRCU.txt --- linux-2.6.34-rc1/Documentation/RCU/whatisRCU.txt 2010-03-15 12:45:49.000000000 -0700 +++ linux-2.6.34-rc1-tip.007b0924-rcu/Documentation/RCU/whatisRCU.txt 2010-04-08 10:01:07.000000000 -0700 @@ -840,6 +840,12 @@ SRCU: Initialization/cleanup init_srcu_struct cleanup_srcu_struct +All: lockdep-checked RCU-protected pointer access + + rcu_dereference_check + rcu_dereference_protected + rcu_access_pointer + See the comment headers in the source code (or the docbook generated from them) for more information. diff -urpNa -X dontdiff linux-2.6.34-rc1/include/linux/rcupdate.h linux-2.6.34-rc1-tip.007b0924-rcu/include/linux/rcupdate.h --- linux-2.6.34-rc1/include/linux/rcupdate.h 2010-03-15 12:46:13.000000000 -0700 +++ linux-2.6.34-rc1-tip.007b0924-rcu/include/linux/rcupdate.h 2010-04-08 10:01:08.000000000 -0700 @@ -97,6 +97,11 @@ extern struct lockdep_map rcu_sched_lock # define rcu_read_release_sched() \ lock_release(&rcu_sched_lock_map, 1, _THIS_IP_) +static inline int debug_lockdep_rcu_enabled(void) +{ + return likely(rcu_scheduler_active && debug_locks); +} + /** * rcu_read_lock_held - might we be in RCU read-side critical section? * @@ -104,28 +109,21 @@ extern struct lockdep_map rcu_sched_lock * an RCU read-side critical section. In absence of CONFIG_PROVE_LOCKING, * this assumes we are in an RCU read-side critical section unless it can * prove otherwise. + * + * Check rcu_scheduler_active to prevent false positives during boot. */ static inline int rcu_read_lock_held(void) { - if (debug_locks) - return lock_is_held(&rcu_lock_map); - return 1; + if (!debug_lockdep_rcu_enabled()) + return 1; + return lock_is_held(&rcu_lock_map); } -/** - * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? - * - * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in - * an RCU-bh read-side critical section. In absence of CONFIG_PROVE_LOCKING, - * this assumes we are in an RCU-bh read-side critical section unless it can - * prove otherwise. +/* + * rcu_read_lock_bh_held() is defined out of line to avoid #include-file + * hell. */ -static inline int rcu_read_lock_bh_held(void) -{ - if (debug_locks) - return lock_is_held(&rcu_bh_lock_map); - return 1; -} +extern int rcu_read_lock_bh_held(void); /** * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? @@ -135,15 +133,26 @@ static inline int rcu_read_lock_bh_held( * this assumes we are in an RCU-sched read-side critical section unless it * can prove otherwise. Note that disabling of preemption (including * disabling irqs) counts as an RCU-sched read-side critical section. + * + * Check rcu_scheduler_active to prevent false positives during boot. */ +#ifdef CONFIG_PREEMPT static inline int rcu_read_lock_sched_held(void) { int lockdep_opinion = 0; + if (!debug_lockdep_rcu_enabled()) + return 1; if (debug_locks) lockdep_opinion = lock_is_held(&rcu_sched_lock_map); - return lockdep_opinion || preempt_count() != 0 || !rcu_scheduler_active; + return lockdep_opinion || preempt_count() != 0 || irqs_disabled(); +} +#else /* #ifdef CONFIG_PREEMPT */ +static inline int rcu_read_lock_sched_held(void) +{ + return 1; } +#endif /* #else #ifdef CONFIG_PREEMPT */ #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -164,10 +173,17 @@ static inline int rcu_read_lock_bh_held( return 1; } +#ifdef CONFIG_PREEMPT +static inline int rcu_read_lock_sched_held(void) +{ + return !rcu_scheduler_active || preempt_count() != 0 || irqs_disabled(); +} +#else /* #ifdef CONFIG_PREEMPT */ static inline int rcu_read_lock_sched_held(void) { - return preempt_count() != 0 || !rcu_scheduler_active; + return 1; } +#endif /* #else #ifdef CONFIG_PREEMPT */ #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -175,23 +191,74 @@ static inline int rcu_read_lock_sched_he /** * rcu_dereference_check - rcu_dereference with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place * - * Do an rcu_dereference(), but check that the context is correct. - * For example, rcu_dereference_check(gp, rcu_read_lock_held()) to - * ensure that the rcu_dereference_check() executes within an RCU - * read-side critical section. It is also possible to check for - * locks being held, for example, by using lockdep_is_held(). + * Do an rcu_dereference(), but check that the conditions under which the + * dereference will take place are correct. Typically the conditions indicate + * the various locking conditions that should be held at that point. The check + * should return true if the conditions are satisfied. + * + * For example: + * + * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || + * lockdep_is_held(&foo->lock)); + * + * could be used to indicate to lockdep that foo->bar may only be dereferenced + * if either the RCU read lock is held, or that the lock required to replace + * the bar struct at foo->bar is held. + * + * Note that the list of conditions may also include indications of when a lock + * need not be held, for example during initialisation or destruction of the + * target struct: + * + * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || + * lockdep_is_held(&foo->lock) || + * atomic_read(&foo->usage) == 0); */ #define rcu_dereference_check(p, c) \ ({ \ - if (debug_locks && !(c)) \ + if (debug_lockdep_rcu_enabled() && !(c)) \ lockdep_rcu_dereference(__FILE__, __LINE__); \ rcu_dereference_raw(p); \ }) +/** + * rcu_access_pointer - fetch RCU pointer with no dereferencing + * + * Return the value of the specified RCU-protected pointer, but omit the + * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * when the value of this pointer is accessed, but the pointer is not + * dereferenced, for example, when testing an RCU-protected pointer against + * NULL. This may also be used in cases where update-side locks prevent + * the value of the pointer from changing, but rcu_dereference_protected() + * is a lighter-weight primitive for this use case. + */ +#define rcu_access_pointer(p) ((void *)ACCESS_ONCE(p)) + +/** + * rcu_dereference_protected - fetch RCU pointer when updates prevented + * + * Return the value of the specified RCU-protected pointer, but omit + * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This + * is useful in cases where update-side locks prevent the value of the + * pointer from changing. Please note that this primitive does -not- + * prevent the compiler from repeating this reference or combining it + * with other references, so it should not be used without protection + * of appropriate locks. + */ +#define rcu_dereference_protected(p, c) \ + ({ \ + if (debug_lockdep_rcu_enabled() && !(c)) \ + lockdep_rcu_dereference(__FILE__, __LINE__); \ + (p); \ + }) + #else /* #ifdef CONFIG_PROVE_RCU */ #define rcu_dereference_check(p, c) rcu_dereference_raw(p) +#define rcu_access_pointer(p) ((void *)ACCESS_ONCE(p)) +#define rcu_dereference_protected(p, c) (p) #endif /* #else #ifdef CONFIG_PROVE_RCU */ diff -urpNa -X dontdiff linux-2.6.34-rc1/include/linux/tracepoint.h linux-2.6.34-rc1-tip.007b0924-rcu/include/linux/tracepoint.h --- linux-2.6.34-rc1/include/linux/tracepoint.h 2010-02-24 10:52:17.000000000 -0800 +++ linux-2.6.34-rc1-tip.007b0924-rcu/include/linux/tracepoint.h 2010-03-27 22:47:09.000000000 -0700 @@ -49,7 +49,7 @@ struct tracepoint { void **it_func; \ \ rcu_read_lock_sched_notrace(); \ - it_func = rcu_dereference((tp)->funcs); \ + it_func = rcu_dereference_sched((tp)->funcs); \ if (it_func) { \ do { \ ((void(*)(proto))(*it_func))(args); \ diff -urpNa -X dontdiff linux-2.6.34-rc1/kernel/rcupdate.c linux-2.6.34-rc1-tip.007b0924-rcu/kernel/rcupdate.c --- linux-2.6.34-rc1/kernel/rcupdate.c 2010-03-15 12:46:14.000000000 -0700 +++ linux-2.6.34-rc1-tip.007b0924-rcu/kernel/rcupdate.c 2010-03-27 22:47:09.000000000 -0700 @@ -45,6 +45,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key rcu_lock_key; @@ -66,6 +67,28 @@ EXPORT_SYMBOL_GPL(rcu_sched_lock_map); int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +/** + * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? + * + * Check for bottom half being disabled, which covers both the + * CONFIG_PROVE_RCU and not cases. Note that if someone uses + * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) + * will show the situation. + * + * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. + */ +int rcu_read_lock_bh_held(void) +{ + if (!debug_lockdep_rcu_enabled()) + return 1; + return in_softirq(); +} +EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); + +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + /* * This function is invoked towards the end of the scheduler's initialization * process. Before this is called, the idle task might contain diff -urpNa -X dontdiff linux-2.6.34-rc1/kernel/rcutree.h linux-2.6.34-rc1-tip.007b0924-rcu/kernel/rcutree.h --- linux-2.6.34-rc1/kernel/rcutree.h 2010-03-15 12:46:14.000000000 -0700 +++ linux-2.6.34-rc1-tip.007b0924-rcu/kernel/rcutree.h 2010-03-15 13:26:00.000000000 -0700 @@ -246,12 +246,21 @@ struct rcu_data { #define RCU_JIFFIES_TILL_FORCE_QS 3 /* for rsp->jiffies_force_qs */ #ifdef CONFIG_RCU_CPU_STALL_DETECTOR -#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ) /* for rsp->jiffies_stall */ -#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ) /* for rsp->jiffies_stall */ -#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ - /* to take at least one */ - /* scheduling clock irq */ - /* before ratting on them. */ + +#ifdef CONFIG_PROVE_RCU +#define RCU_STALL_DELAY_DELTA (5 * HZ) +#else +#define RCU_STALL_DELAY_DELTA 0 +#endif + +#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA) + /* for rsp->jiffies_stall */ +#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA) + /* for rsp->jiffies_stall */ +#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ + /* to take at least one */ + /* scheduling clock irq */ + /* before ratting on them. */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ diff -urpNa -X dontdiff linux-2.6.34-rc1/kernel/rcutree_plugin.h linux-2.6.34-rc1-tip.007b0924-rcu/kernel/rcutree_plugin.h --- linux-2.6.34-rc1/kernel/rcutree_plugin.h 2010-03-15 12:46:14.000000000 -0700 +++ linux-2.6.34-rc1-tip.007b0924-rcu/kernel/rcutree_plugin.h 2010-03-15 13:25:49.000000000 -0700 @@ -1010,6 +1010,10 @@ int rcu_needs_cpu(int cpu) int c = 0; int thatcpu; + /* Check for being in the holdoff period. */ + if (per_cpu(rcu_dyntick_holdoff, cpu) == jiffies) + return rcu_needs_cpu_quick_check(cpu); + /* Don't bother unless we are the last non-dyntick-idle CPU. */ for_each_cpu_not(thatcpu, nohz_cpu_mask) if (thatcpu != cpu) { @@ -1041,10 +1045,8 @@ int rcu_needs_cpu(int cpu) } /* If RCU callbacks are still pending, RCU still needs this CPU. */ - if (c) { + if (c) raise_softirq(RCU_SOFTIRQ); - per_cpu(rcu_dyntick_holdoff, cpu) = jiffies; - } return c; } diff -urpNa -X dontdiff linux-2.6.34-rc1/mm/mempolicy.c linux-2.6.34-rc1-tip.007b0924-rcu/mm/mempolicy.c --- linux-2.6.34-rc1/mm/mempolicy.c 2010-03-15 12:46:14.000000000 -0700 +++ linux-2.6.34-rc1-tip.007b0924-rcu/mm/mempolicy.c 2010-03-18 12:10:45.000000000 -0700 @@ -1756,10 +1756,12 @@ struct mempolicy *__mpol_dup(struct memp if (!new) return ERR_PTR(-ENOMEM); + rcu_read_lock(); if (current_cpuset_is_being_rebound()) { nodemask_t mems = cpuset_mems_allowed(current); mpol_rebind_policy(old, &mems); } + rcu_read_unlock(); *new = *old; atomic_set(&new->refcnt, 1); return new;