diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 1423d25..44c6dcc 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -3,35 +3,79 @@ Using RCU's CPU Stall Detector The CONFIG_RCU_CPU_STALL_DETECTOR kernel config parameter enables RCU's CPU stall detector, which detects conditions that unduly delay RCU grace periods. The stall detector's idea of what constitutes -"unduly delayed" is controlled by a pair of C preprocessor macros: +"unduly delayed" is controlled by a set of C preprocessor macros: RCU_SECONDS_TILL_STALL_CHECK This macro defines the period of time that RCU will wait from the beginning of a grace period until it issues an RCU CPU - stall warning. It is normally ten seconds. + stall warning. This time period is normally ten seconds. RCU_SECONDS_TILL_STALL_RECHECK This macro defines the period of time that RCU will wait after - issuing a stall warning until it issues another stall warning. - It is normally set to thirty seconds. + issuing a stall warning until it issues another stall warning + for the same stall. This time period is normally set to thirty + seconds. RCU_STALL_RAT_DELAY - The CPU stall detector tries to make the offending CPU rat on itself, - as this often gives better-quality stack traces. However, if - the offending CPU does not detect its own stall in the number - of jiffies specified by RCU_STALL_RAT_DELAY, then other CPUs will - complain. This is normally set to two jiffies. + The CPU stall detector tries to make the offending CPU print its + own warnings, as this often gives better-quality stack traces. + However, if the offending CPU does not detect its own stall in + the number of jiffies specified by RCU_STALL_RAT_DELAY, then + some other CPU will complain. This delay is normally set to + two jiffies. -The following problems can result in an RCU CPU stall warning: +When a CPU detects that it is stalling, it will print a message similar +to the following: + +INFO: rcu_sched_state detected stall on CPU 5 (t=2500 jiffies) + +This message indicates that CPU 5 detected that it was causing a stall, +and that the stall was affecting RCU-sched. This message will normally be +followed by a stack dump of the offending CPU. On TREE_RCU kernel builds, +RCU and RCU-sched are implemented by the same underlying mechanism, +while on TREE_PREEMPT_RCU kernel builds, RCU is instead implemented +by rcu_preempt_state. + +On the other hand, if the offending CPU fails to print out a stall-warning +message quickly enough, some other CPU will print a message similar to +the following: + +INFO: rcu_bh_state detected stalls on CPUs/tasks: { 3 5 } (detected by 2, 2502 jiffies) + +This message indicates that CPU 2 detected that CPUs 3 and 5 were both +causing stalls, and that the stall was affecting RCU-bh. This message +will normally be followed by stack dumps for each CPU. Please note that +TREE_PREEMPT_RCU builds can be stalled by tasks as well as by CPUs, +and that the tasks will be indicated by PID, for example, "P3421". +It is even possible for a rcu_preempt_state stall to be caused by both +CPUs -and- tasks, in which case the offending CPUs and tasks will all +be called out in the list. + +Finally, if the grace period ends just as the stall warning starts +printing, there will be a spurious stall-warning message: + +INFO: rcu_bh_state detected stalls on CPUs/tasks: { } (detected by 4, 2502 jiffies) + +This is rare, but does happen from time to time in real life. + +So your kernel printed an RCU CPU stall warning. The next question is +"What caused it?" The following problems can result in RCU CPU stall +warnings: o A CPU looping in an RCU read-side critical section. -o A CPU looping with interrupts disabled. +o A CPU looping with interrupts disabled. This condition can + result in RCU-sched and RCU-bh stalls. -o A CPU looping with preemption disabled. +o A CPU looping with preemption disabled. This condition can + result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh + stalls. + +o A CPU looping with bottom halves disabled. This condition can + result in RCU-sched and RCU-bh stalls. o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel without invoking schedule(). @@ -39,20 +83,24 @@ o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel o A bug in the RCU implementation. o A hardware failure. This is quite unlikely, but has occurred - at least once in a former life. A CPU failed in a running system, + at least once in real life. A CPU failed in a running system, becoming unresponsive, but not causing an immediate crash. This resulted in a series of RCU CPU stall warnings, eventually leading the realization that the CPU had failed. -The RCU, RCU-sched, and RCU-bh implementations have CPU stall warning. -SRCU does not do so directly, but its calls to synchronize_sched() will -result in RCU-sched detecting any CPU stalls that might be occurring. - -To diagnose the cause of the stall, inspect the stack traces. The offending -function will usually be near the top of the stack. If you have a series -of stall warnings from a single extended stall, comparing the stack traces -can often help determine where the stall is occurring, which will usually -be in the function nearest the top of the stack that stays the same from -trace to trace. +The RCU, RCU-sched, and RCU-bh implementations have CPU stall +warning. SRCU does not have its own CPU stall warnings, but its +calls to synchronize_sched() will result in RCU-sched detecting +RCU-sched-related CPU stalls. Please note that RCU only detects +CPU stalls when there is a grace period in progress. No grace period, +no CPU stall warnings. + +To diagnose the cause of the stall, inspect the stack traces. +The offending function will usually be near the top of the stack. +If you have a series of stall warnings from a single extended stall, +comparing the stack traces can often help determine where the stall +is occurring, which will usually be in the function nearest the top of +that portion of the stack which remains the same from trace to trace. +If you can reliably trigger the stall, ftrace can be quite helpful. RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE. diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 8608fd8..efd8cc9 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -256,23 +256,23 @@ o Each element of the form "1/1 0:127 ^0" represents one struct The output of "cat rcu/rcu_pending" looks as follows: rcu_sched: - 0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741 - 1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792 - 2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629 - 3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723 - 4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110 - 5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456 - 6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834 - 7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888 + 0 np=255892 qsp=53936 rpq=85 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741 + 1 np=261224 qsp=54638 rpq=33 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792 + 2 np=237496 qsp=49664 rpq=23 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629 + 3 np=236249 qsp=48766 rpq=98 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723 + 4 np=221310 qsp=46850 rpq=7 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110 + 5 np=237332 qsp=48449 rpq=9 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456 + 6 np=219995 qsp=46718 rpq=12 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834 + 7 np=249893 qsp=49390 rpq=42 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888 rcu_bh: - 0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314 - 1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180 - 2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936 - 3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863 - 4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671 - 5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235 - 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 - 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 + 0 np=146741 qsp=1419 rpq=6 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314 + 1 np=155792 qsp=12597 rpq=3 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180 + 2 np=136629 qsp=18680 rpq=1 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936 + 3 np=137723 qsp=2843 rpq=0 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863 + 4 np=123110 qsp=12433 rpq=0 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671 + 5 np=137456 qsp=4210 rpq=1 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235 + 6 np=120834 qsp=9902 rpq=2 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 + 7 np=144888 qsp=26336 rpq=0 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 As always, this is once again split into "rcu_sched" and "rcu_bh" portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional @@ -284,6 +284,9 @@ o "np" is the number of times that __rcu_pending() has been invoked o "qsp" is the number of times that the RCU was waiting for a quiescent state from this CPU. +o "rpq" is the number of times that the CPU had passed through + a quiescent state, but not yet reported it to RCU. + o "cbr" is the number of times that this CPU had RCU callbacks that had passed through a grace period, and were thus ready to be invoked. diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index ebc2f38..2c7e801 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -92,7 +92,6 @@ static void pte_free_rcu_callback(struct rcu_head *head) static void pte_free_submit(struct pte_freelist_batch *batch) { - INIT_RCU_HEAD(&batch->rcu); call_rcu(&batch->rcu, pte_free_rcu_callback); } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 06d9e79..9c65faf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -384,7 +384,7 @@ struct kvm_mem_aliases { }; struct kvm_arch { - struct kvm_mem_aliases *aliases; + struct kvm_mem_aliases __rcu *aliases; unsigned int n_free_mmu_pages; unsigned int n_requested_mmu_pages; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8a6f0af..f00a9f2 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -50,7 +50,7 @@ static DEFINE_MUTEX(mce_read_mutex); #define rcu_dereference_check_mce(p) \ - rcu_dereference_check((p), \ + rcu_dereference_index_check((p), \ rcu_read_lock_sched_held() || \ lockdep_is_held(&mce_read_mutex)) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 5f127cf..702d006 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -3743,7 +3743,6 @@ static void *cfq_init_queue(struct request_queue *q) * second, in order to have larger depth for async operations. */ cfqd->last_delayed_sync = jiffies - HZ; - INIT_RCU_HEAD(&cfqd->rcu); return cfqd; } diff --git a/block/genhd.c b/block/genhd.c index d13ba76..27e85a2 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -987,7 +987,6 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno) if (!new_ptbl) return -ENOMEM; - INIT_RCU_HEAD(&new_ptbl->rcu_head); new_ptbl->len = target; for (i = 0; i < len; i++) diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index 2ee6c7a..73b1208 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -28,7 +28,7 @@ struct evdev { int minor; struct input_handle handle; wait_queue_head_t wait; - struct evdev_client *grab; + struct evdev_client __rcu *grab; struct list_head client_list; spinlock_t client_lock; /* protects client_list */ struct mutex mutex; diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h index cd4b0e4..7bdb1cb 100644 --- a/drivers/net/bnx2.h +++ b/drivers/net/bnx2.h @@ -6746,7 +6746,7 @@ struct bnx2 { u32 tx_wake_thresh; #ifdef BCM_CNIC - struct cnic_ops *cnic_ops; + struct cnic_ops __rcu *cnic_ops; void *cnic_data; #endif diff --git a/drivers/net/bnx2x.h b/drivers/net/bnx2x.h index 3c48a7a..9dfb57b 100644 --- a/drivers/net/bnx2x.h +++ b/drivers/net/bnx2x.h @@ -1007,7 +1007,7 @@ struct bnx2x { dma_addr_t timers_mapping; void *qm; dma_addr_t qm_mapping; - struct cnic_ops *cnic_ops; + struct cnic_ops __rcu *cnic_ops; void *cnic_data; u32 cnic_tag; struct cnic_eth_dev cnic_eth_dev; diff --git a/drivers/net/cnic.h b/drivers/net/cnic.h index a0d853d..9852375 100644 --- a/drivers/net/cnic.h +++ b/drivers/net/cnic.h @@ -177,7 +177,7 @@ struct cnic_local { #define ULP_F_INIT 0 #define ULP_F_START 1 #define ULP_F_CALL_PENDING 2 - struct cnic_ulp_ops *ulp_ops[MAX_CNIC_ULP_TYPE]; + struct cnic_ulp_ops __rcu *ulp_ops[MAX_CNIC_ULP_TYPE]; /* protected by ulp_lock */ u32 cnic_local_flags; diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index abba3cc..adf0145 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -37,7 +37,7 @@ struct macvtap_queue { struct sock sk; struct socket sock; - struct macvlan_dev *vlan; + struct macvlan_dev __rcu *vlan; struct file *file; unsigned int flags; }; diff --git a/drivers/staging/batman-adv/hard-interface.c b/drivers/staging/batman-adv/hard-interface.c index befd488..96ea0e5 100644 --- a/drivers/staging/batman-adv/hard-interface.c +++ b/drivers/staging/batman-adv/hard-interface.c @@ -301,7 +301,6 @@ int hardif_add_interface(char *dev, int if_num) batman_if->if_num = if_num; batman_if->dev = dev; batman_if->if_active = IF_INACTIVE; - INIT_RCU_HEAD(&batman_if->rcu); printk(KERN_INFO "batman-adv:Adding interface: %s\n", dev); avail_ifs++; diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 9777583..945c5cb 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -364,7 +364,10 @@ static void vhost_net_disable_vq(struct vhost_net *n, static void vhost_net_enable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { - struct socket *sock = vq->private_data; + struct socket *sock; + + sock = rcu_dereference_protected(vq->private_data, + lockdep_is_held(&vq->mutex)); if (!sock) return; if (vq == n->vqs + VHOST_NET_VQ_TX) { @@ -380,7 +383,8 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, struct socket *sock; mutex_lock(&vq->mutex); - sock = vq->private_data; + sock = rcu_dereference_protected(vq->private_data, + lockdep_is_held(&vq->mutex)); vhost_net_disable_vq(n, vq); rcu_assign_pointer(vq->private_data, NULL); mutex_unlock(&vq->mutex); @@ -518,7 +522,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) } /* start polling new socket */ - oldsock = vq->private_data; + oldsock = rcu_dereference_protected(vq->private_data, + lockdep_is_held(&vq->mutex)); if (sock == oldsock) goto done; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index e69d238..fc0c077 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -180,7 +180,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev) vhost_dev_cleanup(dev); memory->nregions = 0; - dev->memory = memory; + RCU_INIT_POINTER(dev->memory, memory); return 0; } @@ -212,8 +212,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev) fput(dev->log_file); dev->log_file = NULL; /* No one will access memory at this point */ - kfree(dev->memory); - dev->memory = NULL; + kfree(rcu_dereference_protected(dev->memory, + lockdep_is_held(&dev->mutex))); + RCU_INIT_POINTER(dev->memory, NULL); if (dev->mm) mmput(dev->mm); dev->mm = NULL; @@ -294,14 +295,14 @@ static int vq_access_ok(unsigned int num, /* Caller should have device mutex but not vq mutex */ int vhost_log_access_ok(struct vhost_dev *dev) { - return memory_access_ok(dev, dev->memory, 1); + return memory_access_ok(dev, rcu_dereference_protected(dev->memory, lockdep_is_held(&dev->mutex)), 1); } /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) { - return vq_memory_access_ok(log_base, vq->dev->memory, + return vq_memory_access_ok(log_base, rcu_dereference_protected(vq->dev->memory, lockdep_is_held(&dev->mutex)), vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + @@ -342,7 +343,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) if (!memory_access_ok(d, newmem, vhost_has_feature(d, VHOST_F_LOG_ALL))) return -EFAULT; - oldmem = d->memory; + oldmem = rcu_dereference_protected(d->memory, + lockdep_is_held(&d->mutex)); rcu_assign_pointer(d->memory, newmem); synchronize_rcu(); kfree(oldmem); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 44591ba..240396c 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -92,7 +92,7 @@ struct vhost_virtqueue { * work item execution acts instead of rcu_read_lock() and the end of * work item execution acts instead of rcu_read_lock(). * Writers use virtqueue mutex. */ - void *private_data; + void __rcu *private_data; /* Log write descriptors */ void __user *log_base; struct vhost_log log[VHOST_NET_MAX_SG]; @@ -102,7 +102,7 @@ struct vhost_dev { /* Readers use RCU to access memory table pointer * log base pointer and features. * Writers use mutex below.*/ - struct vhost_memory *memory; + struct vhost_memory __rcu *memory; struct mm_struct *mm; struct mutex mutex; unsigned acked_features; diff --git a/fs/file.c b/fs/file.c index 34bb7f7..cccaead 100644 --- a/fs/file.c +++ b/fs/file.c @@ -178,7 +178,6 @@ static struct fdtable * alloc_fdtable(unsigned int nr) fdt->open_fds = (fd_set *)data; data += nr / BITS_PER_BYTE; fdt->close_on_exec = (fd_set *)data; - INIT_RCU_HEAD(&fdt->rcu); fdt->next = NULL; return fdt; @@ -312,7 +311,6 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; new_fdt->open_fds = (fd_set *)&newf->open_fds_init; new_fdt->fd = &newf->fd_array[0]; - INIT_RCU_HEAD(&new_fdt->rcu); new_fdt->next = NULL; spin_lock(&oldf->file_lock); @@ -430,7 +428,6 @@ struct files_struct init_files = { .fd = &init_files.fd_array[0], .close_on_exec = (fd_set *)&init_files.close_on_exec_init, .open_fds = (fd_set *)&init_files.open_fds_init, - .rcu = RCU_HEAD_INIT, }, .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), }; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 4b37f7c..ea38c65 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -75,12 +75,33 @@ static inline bool bdi_work_on_stack(struct bdi_work *work) return test_bit(WS_ONSTACK_B, &work->state); } -static inline void bdi_work_init(struct bdi_work *work, - struct wb_writeback_args *args) +static inline void __bdi_work_init(struct bdi_work *work, + struct wb_writeback_args *args, + int on_stack) { - INIT_RCU_HEAD(&work->rcu_head); work->args = *args; work->state = WS_USED; + if (on_stack) { + work->state |= WS_ONSTACK; + init_rcu_head_on_stack(&work->rcu_head); + } +} + +static inline void bdi_work_init(struct bdi_work *work, + struct wb_writeback_args *args) +{ + __bdi_work_init(work, args, false); +} + +static inline void bdi_work_init_on_stack(struct bdi_work *work, + struct wb_writeback_args *args) +{ + __bdi_work_init(work, args, true); +} + +static inline void bdi_destroy_work_on_stack(struct bdi_work *work) +{ + destroy_rcu_head_on_stack(&work->rcu_head); } /** @@ -233,11 +254,11 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi, }; struct bdi_work work; - bdi_work_init(&work, &args); - work.state |= WS_ONSTACK; + bdi_work_init_on_stack(&work, &args); bdi_queue_work(bdi, &work); bdi_wait_on_work_clear(&work); + bdi_destroy_work_on_stack(&work); } /** diff --git a/fs/partitions/check.c b/fs/partitions/check.c index e238ab2..7444e6f 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -456,7 +456,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, } /* everything is up and running, commence */ - INIT_RCU_HEAD(&p->rcu_head); rcu_assign_pointer(ptbl->part[partno], p); /* suppress uevent if the disk supresses it */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 8f78073..b147fd5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -75,7 +75,7 @@ struct cgroup_subsys_state { unsigned long flags; /* ID for this css, if possible */ - struct css_id *id; + struct css_id __rcu *id; }; /* bits in struct cgroup_subsys_state flags field */ @@ -205,7 +205,7 @@ struct cgroup { struct list_head children; /* my children */ struct cgroup *parent; /* my parent */ - struct dentry *dentry; /* cgroup fs entry, RCU protected */ + struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/include/linux/compiler.h b/include/linux/compiler.h index a5a472b..320d6c9 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -16,6 +16,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +#ifdef CONFIG_SPARSE_RCU_POINTER +# define __rcu __attribute__((noderef, address_space(4))) +#else +# define __rcu +#endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else @@ -34,6 +39,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); # define __release(x) (void)0 # define __cond_lock(x,c) (c) # define __percpu +# define __rcu #endif #ifdef __KERNEL__ diff --git a/include/linux/cred.h b/include/linux/cred.h index 52507c3..413f98a 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,7 +84,7 @@ struct thread_group_cred { atomic_t usage; pid_t tgid; /* thread group process ID */ spinlock_t lock; - struct key *session_keyring; /* keyring inherited over fork */ + struct key __rcu *session_keyring; /* keyring inherited over fork */ struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h index 8c243aa..597692f 100644 --- a/include/linux/debugobjects.h +++ b/include/linux/debugobjects.h @@ -20,12 +20,14 @@ struct debug_obj_descr; * struct debug_obj - representaion of an tracked object * @node: hlist node to link the object into the tracker list * @state: tracked object state + * @astate: current active state * @object: pointer to the real object * @descr: pointer to an object type specific debug description structure */ struct debug_obj { struct hlist_node node; enum debug_obj_state state; + unsigned int astate; void *object; struct debug_obj_descr *descr; }; @@ -60,6 +62,15 @@ extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr); extern void debug_object_destroy (void *addr, struct debug_obj_descr *descr); extern void debug_object_free (void *addr, struct debug_obj_descr *descr); +/* + * Active state: + * - Set at 0 upon initialization. + * - Must return to 0 before deactivation. + */ +extern void +debug_object_active_state(void *addr, struct debug_obj_descr *descr, + unsigned int expect, unsigned int next); + extern void debug_objects_early_init(void); extern void debug_objects_mem_init(void); #else diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 013dc52..3e4c4f4 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -11,6 +11,7 @@ #include #include #include +#include #include @@ -30,7 +31,7 @@ struct embedded_fd_set { struct fdtable { unsigned int max_fds; - struct file ** fd; /* current fd array */ + struct file __rcu **fd; /* current fd array */ fd_set *close_on_exec; fd_set *open_fds; struct rcu_head rcu; @@ -45,7 +46,7 @@ struct files_struct { * read mostly part */ atomic_t count; - struct fdtable *fdt; + struct fdtable __rcu *fdt; struct fdtable fdtab; /* * written part on a separate cache line in SMP @@ -54,7 +55,7 @@ struct files_struct { int next_fd; struct embedded_fd_set close_on_exec_init; struct embedded_fd_set open_fds_init; - struct file * fd_array[NR_OPEN_DEFAULT]; + struct file __rcu * fd_array[NR_OPEN_DEFAULT]; }; #define rcu_dereference_check_fdtable(files, fdtfd) \ diff --git a/include/linux/fs.h b/include/linux/fs.h index 44f35ae..752bc76 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1382,7 +1382,7 @@ struct super_block { * Saved mount options for lazy filesystems using * generic_show_options() */ - char *s_options; + char __rcu *s_options; }; extern struct timespec current_fs_time(struct super_block *sb); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5f2f4c4..af3f06b 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -129,8 +129,8 @@ struct blk_scsi_cmd_filter { struct disk_part_tbl { struct rcu_head rcu_head; int len; - struct hd_struct *last_lookup; - struct hd_struct *part[]; + struct hd_struct __rcu *last_lookup; + struct hd_struct __rcu *part[]; }; struct gendisk { @@ -149,7 +149,7 @@ struct gendisk { * non-critical accesses use RCU. Always access through * helpers. */ - struct disk_part_tbl *part_tbl; + struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; const struct block_device_operations *fops; diff --git a/include/linux/idr.h b/include/linux/idr.h index e968db7..cdb715e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -50,14 +50,14 @@ struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ - struct idr_layer *ary[1< +/* br_handle_frame_hook() needs the following forward declaration. */ +struct net_bridge_port; + extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff *skb); diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index b78a712..c15ed77 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -47,7 +47,7 @@ struct macvlan_dev { enum macvlan_mode mode; int (*receive)(struct sk_buff *skb); int (*forward)(struct net_device *dev, struct sk_buff *skb); - struct macvtap_queue *tap; + struct macvtap_queue __rcu *tap; }; static inline void macvlan_count_rx(const struct macvlan_dev *vlan, diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 93fc244..39dd315 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -167,10 +167,10 @@ struct ip_sf_socklist { */ struct ip_mc_socklist { - struct ip_mc_socklist *next; + struct ip_mc_socklist __rcu *next; struct ip_mreqn multi; unsigned int sfmode; /* MCAST_{INCLUDE,EXCLUDE} */ - struct ip_sf_socklist *sflist; + struct ip_sf_socklist __rcu *sflist; struct rcu_head rcu; }; diff --git a/include/linux/init_task.h b/include/linux/init_task.h index b1ed1cd..f05af8c 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -49,7 +49,6 @@ extern struct group_info init_groups; { .first = &init_task.pids[PIDTYPE_PGID].node }, \ { .first = &init_task.pids[PIDTYPE_SID].node }, \ }, \ - .rcu = RCU_HEAD_INIT, \ .level = 0, \ .numbers = { { \ .nr = 0, \ @@ -138,8 +137,8 @@ extern struct cred init_cred; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .real_cred = &init_cred, \ - .cred = &init_cred, \ + RCU_INIT_POINTER(.real_cred, &init_cred), \ + RCU_INIT_POINTER(.cred, &init_cred), \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(tsk.cred_guard_mutex), \ .comm = "swapper", \ diff --git a/include/linux/input.h b/include/linux/input.h index 7ed2251..850b6b7 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -1173,7 +1173,7 @@ struct input_dev { int (*flush)(struct input_dev *dev, struct file *file); int (*event)(struct input_dev *dev, unsigned int type, unsigned int code, int value); - struct input_handle *grab; + struct input_handle __rcu *grab; spinlock_t event_lock; struct mutex mutex; diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h index a0bb301..6d4cd79 100644 --- a/include/linux/iocontext.h +++ b/include/linux/iocontext.h @@ -54,7 +54,7 @@ struct io_context { struct radix_tree_root radix_root; struct hlist_head cic_list; - void *ioc_data; + void __rcu *ioc_data; }; static inline struct io_context *ioc_task_link(struct io_context *ioc) diff --git a/include/linux/key.h b/include/linux/key.h index cd50dfa..3db0adc 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -178,8 +178,9 @@ struct key { */ union { unsigned long value; + void __rcu *rcudata; void *data; - struct keyring_list *subscriptions; + struct keyring_list __rcu *subscriptions; } payload; }; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 169d077..282b041 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -169,7 +169,7 @@ struct kvm { raw_spinlock_t requests_lock; struct mutex slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ - struct kvm_memslots *memslots; + struct kvm_memslots __rcu *memslots; struct srcu_struct srcu; #ifdef CONFIG_KVM_APIC_ARCHITECTURE u32 bsp_vcpu_id; @@ -179,7 +179,7 @@ struct kvm { atomic_t online_vcpus; struct list_head vm_list; struct mutex lock; - struct kvm_io_bus *buses[KVM_NR_BUSES]; + struct kvm_io_bus __rcu *buses[KVM_NR_BUSES]; #ifdef CONFIG_HAVE_KVM_EVENTFD struct { spinlock_t lock; @@ -197,7 +197,7 @@ struct kvm { struct mutex irq_lock; #ifdef CONFIG_HAVE_KVM_IRQCHIP - struct kvm_irq_routing_table *irq_routing; + struct kvm_irq_routing_table __rcu *irq_routing; struct hlist_head mask_notifier_list; struct hlist_head irq_ack_notifier_list; #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b8bb9a6..05537a5 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -299,7 +299,7 @@ struct mm_struct { * new_owner->mm == mm * new_owner->alloc_lock is held */ - struct task_struct *owner; + struct task_struct __rcu *owner; #endif #ifdef CONFIG_PROC_FS diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fa8b476..425be97 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -855,10 +855,10 @@ struct net_device { #ifdef CONFIG_NET_DSA void *dsa_ptr; /* dsa specific data */ #endif - void *atalk_ptr; /* AppleTalk link */ - void *ip_ptr; /* IPv4 specific data */ + void *atalk_ptr; /* AppleTalk link */ + void __rcu *ip_ptr; /* IPv4 specific data */ void *dn_ptr; /* DECnet specific data */ - void *ip6_ptr; /* IPv6 specific data */ + void __rcu *ip6_ptr; /* IPv6 specific data */ void *ec_ptr; /* Econet specific data */ void *ax25_ptr; /* AX.25 specific data */ struct wireless_dev *ieee80211_ptr; /* IEEE 802.11 specific data, @@ -947,11 +947,11 @@ struct net_device { void *ml_priv; /* bridge stuff */ - struct net_bridge_port *br_port; + void __rcu *br_port; /* macvlan */ - struct macvlan_port *macvlan_port; + struct macvlan_port __rcu *macvlan_port; /* GARP */ - struct garp_port *garp_port; + struct garp_port __rcu *garp_port; /* class/net/name entry */ struct device dev; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 07ce460..491da02 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -178,7 +178,7 @@ struct nfs_inode { struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ struct list_head open_states; - struct nfs_delegation *delegation; + struct nfs_delegation __rcu *delegation; fmode_t delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ diff --git a/include/linux/notifier.h b/include/linux/notifier.h index fee6c2f..f05f5e4 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -49,28 +49,28 @@ struct notifier_block { int (*notifier_call)(struct notifier_block *, unsigned long, void *); - struct notifier_block *next; + struct notifier_block __rcu *next; int priority; }; struct atomic_notifier_head { spinlock_t lock; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct blocking_notifier_head { struct rw_semaphore rwsem; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct raw_notifier_head { - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct srcu_notifier_head { struct mutex mutex; struct srcu_struct srcu; - struct notifier_block *head; + struct notifier_block __rcu *head; }; #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index c8e3754..48132eb 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -582,7 +582,7 @@ struct perf_event { int nr_siblings; int group_flags; struct perf_event *group_leader; - struct perf_event *output; + struct perf_event __rcu *output; const struct pmu *pmu; enum perf_event_active_state state; @@ -643,7 +643,7 @@ struct perf_event { /* mmap bits */ struct mutex mmap_mutex; atomic_t mmap_count; - struct perf_mmap_data *data; + struct perf_mmap_data __rcu *data; /* poll related */ wait_queue_head_t waitq; @@ -710,7 +710,7 @@ struct perf_event_context { * These fields let us detect when two contexts have both * been cloned (inherited) from a common ancestor. */ - struct perf_event_context *parent_ctx; + struct perf_event_context __rcu *parent_ctx; u64 parent_gen; u64 generation; int pin_count; diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 55ca73c..d801044 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr) { return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } +#define radix_tree_indirect_to_ptr(ptr) \ + radix_tree_indirect_to_ptr((void __force *)(ptr)) static inline int radix_tree_is_indirect_ptr(void *ptr) { @@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) struct radix_tree_root { unsigned int height; gfp_t gfp_mask; - struct radix_tree_node *rnode; + struct radix_tree_node __rcu *rnode; }; #define RADIX_TREE_INIT(mask) { \ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 2c9b46c..891c52a 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -10,6 +10,12 @@ #include /* + * return the ->next pointer of a list_head in an rcu safe + * way, we must not access it directly + */ +#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) + +/* * Insert a new entry between two known consecutive entries. * * This is only for internal list manipulation where we know @@ -20,7 +26,7 @@ static inline void __list_add_rcu(struct list_head *new, { new->next = next; new->prev = prev; - rcu_assign_pointer(prev->next, new); + rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } @@ -138,7 +144,7 @@ static inline void list_replace_rcu(struct list_head *old, { new->next = old->next; new->prev = old->prev; - rcu_assign_pointer(new->prev->next, new); + rcu_assign_pointer(list_next_rcu(new->prev), new); new->next->prev = new; old->prev = LIST_POISON2; } @@ -193,7 +199,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ last->next = at; - rcu_assign_pointer(head->next, first); + rcu_assign_pointer(list_next_rcu(head), first); first->prev = head; at->prev = last; } @@ -208,7 +214,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ - container_of(rcu_dereference_raw(ptr), type, member) + ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \ + container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ + }) /** * list_first_entry_rcu - get the first element from a list @@ -225,9 +233,9 @@ static inline void list_splice_init_rcu(struct list_head *list, list_entry_rcu((ptr)->next, type, member) #define __list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference_raw((head)->next); \ + for (pos = rcu_dereference_raw(list_next_rcu(head)); \ pos != (head); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(list_next_rcu((pos))) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -257,9 +265,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference_raw((pos)->next); \ + for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \ prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference_raw((pos)->next)) + (pos) = rcu_dereference_raw(list_next_rcu(pos))) /** * list_for_each_entry_continue_rcu - continue iteration over list of given type @@ -314,12 +322,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old, new->next = next; new->pprev = old->pprev; - rcu_assign_pointer(*new->pprev, new); + rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); if (next) new->next->pprev = &new->next; old->pprev = LIST_POISON2; } +/* + * return the first or the next element in an RCU protected hlist + */ +#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) +#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) +#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) + /** * hlist_add_head_rcu * @n: the element to add to the hash list. @@ -346,7 +361,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_first_rcu(h), n); if (first) first->pprev = &n->next; } @@ -374,7 +389,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, { n->pprev = next->pprev; n->next = next; - rcu_assign_pointer(*(n->pprev), n); + rcu_assign_pointer(hlist_pprev_rcu(n), n); next->pprev = &n->next; } @@ -401,15 +416,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, { n->next = prev->next; n->pprev = &prev->next; - rcu_assign_pointer(prev->next, n); + rcu_assign_pointer(hlist_next_rcu(prev), n); if (n->next) n->next->pprev = &n->next; } -#define __hlist_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ prefetch(pos->next); 1; }); \ - pos = rcu_dereference(pos->next)) +#define __hlist_for_each_rcu(pos, head) \ + for (pos = rcu_dereference(hlist_first_rcu(head)); \ + pos && ({ prefetch(pos->next); 1; }); \ + pos = rcu_dereference(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu - iterate over rcu list of given type @@ -422,11 +437,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ pos && ({ prefetch(pos->next); 1; }) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_next_rcu(pos))) #endif /* __KERNEL__ */ #endif diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index b70ffe5..2ae1371 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) } } +#define hlist_nulls_first_rcu(head) \ + (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) + +#define hlist_nulls_next_rcu(node) \ + (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) + /** * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. @@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) first->pprev = &n->next; } @@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, * @member: the name of the hlist_nulls_node within the struct. * */ -#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ - (!is_a_nulls(pos)) && \ +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) #endif #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index db266bb..0ce0f05 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -40,6 +40,8 @@ #include #include #include +#include +#include #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; /* for sysctl */ @@ -56,8 +58,6 @@ struct rcu_head { }; /* Exported common interfaces */ -extern void synchronize_rcu_bh(void); -extern void synchronize_sched(void); extern void rcu_barrier(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); @@ -66,8 +66,6 @@ extern int sched_expedited_torture_stats(char *page); /* Internal to kernel */ extern void rcu_init(void); -extern int rcu_scheduler_active; -extern void rcu_scheduler_starting(void); #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include @@ -77,11 +75,24 @@ extern void rcu_scheduler_starting(void); #error "Unknown RCU implementation specified to kernel configuration" #endif -#define RCU_HEAD_INIT { .next = NULL, .func = NULL } -#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT -#define INIT_RCU_HEAD(ptr) do { \ - (ptr)->next = NULL; (ptr)->func = NULL; \ -} while (0) +/* + * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic + * initialization and destruction of rcu_head on the stack. rcu_head structures + * allocated dynamically in the heap or defined statically don't need any + * initialization. + */ +#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD +extern void init_rcu_head_on_stack(struct rcu_head *head); +extern void destroy_rcu_head_on_stack(struct rcu_head *head); +#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +static inline void init_rcu_head_on_stack(struct rcu_head *head) +{ +} + +static inline void destroy_rcu_head_on_stack(struct rcu_head *head) +{ +} +#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -104,14 +115,16 @@ extern struct lockdep_map rcu_sched_lock_map; extern int debug_lockdep_rcu_enabled(void); /** - * rcu_read_lock_held - might we be in RCU read-side critical section? + * rcu_read_lock_held() - might we be in RCU read-side critical section? * - * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in - * an RCU read-side critical section. In absence of CONFIG_PROVE_LOCKING, + * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU + * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, * this assumes we are in an RCU read-side critical section unless it can - * prove otherwise. + * prove otherwise. This is useful for debug checks in functions that + * require that they be called within an RCU read-side critical section. * - * Check rcu_scheduler_active to prevent false positives during boot. + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot + * and while lockdep is disabled. */ static inline int rcu_read_lock_held(void) { @@ -127,15 +140,19 @@ static inline int rcu_read_lock_held(void) extern int rcu_read_lock_bh_held(void); /** - * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? - * - * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in an - * RCU-sched read-side critical section. In absence of CONFIG_PROVE_LOCKING, - * this assumes we are in an RCU-sched read-side critical section unless it - * can prove otherwise. Note that disabling of preemption (including - * disabling irqs) counts as an RCU-sched read-side critical section. - * - * Check rcu_scheduler_active to prevent false positives during boot. + * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? + * + * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an + * RCU-sched read-side critical section. In absence of + * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side + * critical section unless it can prove otherwise. Note that disabling + * of preemption (including disabling irqs) counts as an RCU-sched + * read-side critical section. This is useful for debug checks in functions + * that required that they be called within an RCU-sched read-side + * critical section. + * + * Check debug_lockdep_rcu_enabled() to prevent false positives during boot + * and while lockdep is disabled. */ #ifdef CONFIG_PREEMPT static inline int rcu_read_lock_sched_held(void) @@ -177,7 +194,7 @@ static inline int rcu_read_lock_bh_held(void) #ifdef CONFIG_PREEMPT static inline int rcu_read_lock_sched_held(void) { - return !rcu_scheduler_active || preempt_count() != 0 || irqs_disabled(); + return preempt_count() != 0 || irqs_disabled(); } #else /* #ifdef CONFIG_PREEMPT */ static inline int rcu_read_lock_sched_held(void) @@ -192,42 +209,164 @@ static inline int rcu_read_lock_sched_held(void) extern int rcu_my_thread_group_empty(void); +#define __do_rcu_dereference_check(c) \ + do { \ + static bool __warned; \ + if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ + __warned = true; \ + lockdep_rcu_dereference(__FILE__, __LINE__); \ + } \ + } while (0) + +#else /* #ifdef CONFIG_PROVE_RCU */ + +#define __do_rcu_dereference_check(c) do { } while (0) + +#endif /* #else #ifdef CONFIG_PROVE_RCU */ + +/* + * Helper functions for rcu_dereference_check(), rcu_dereference_protected() + * and rcu_assign_pointer(). Some of these could be folded into their + * callers, but they are left separate in order to ease introduction of + * multiple flavors of pointers to match the multiple flavors of RCU + * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in + * the future. + */ +#define __rcu_access_pointer(p, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + (void) (((typeof (*p) space *)p) == p); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_check(p, c, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + __do_rcu_dereference_check(c); \ + (void) (((typeof (*p) space *)p) == p); \ + smp_read_barrier_depends(); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_protected(p, c, space) \ + ({ \ + __do_rcu_dereference_check(c); \ + (void) (((typeof (*p) space *)p) == p); \ + ((typeof(*p) __force __kernel *)(p)); \ + }) + +#define __rcu_dereference_index_check(p, c) \ + ({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + __do_rcu_dereference_check(c); \ + smp_read_barrier_depends(); \ + (_________p1); \ + }) +#define __rcu_assign_pointer(p, v, space) \ + ({ \ + if (!__builtin_constant_p(v) || \ + ((v) != NULL)) \ + smp_wmb(); \ + (p) = (typeof(*v) __force space *)(v); \ + }) + + +/** + * rcu_access_pointer() - fetch RCU pointer with no dereferencing + * @p: The pointer to read + * + * Return the value of the specified RCU-protected pointer, but omit the + * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * when the value of this pointer is accessed, but the pointer is not + * dereferenced, for example, when testing an RCU-protected pointer against + * NULL. Although rcu_access_pointer() may also be used in cases where + * update-side locks prevent the value of the pointer from changing, you + * should instead use rcu_dereference_protected() for this use case. + */ +#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) + /** - * rcu_dereference_check - rcu_dereference with debug checking + * rcu_dereference_check() - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Do an rcu_dereference(), but check that the conditions under which the - * dereference will take place are correct. Typically the conditions indicate - * the various locking conditions that should be held at that point. The check - * should return true if the conditions are satisfied. + * dereference will take place are correct. Typically the conditions + * indicate the various locking conditions that should be held at that + * point. The check should return true if the conditions are satisfied. + * An implicit check for being in an RCU read-side critical section + * (rcu_read_lock()) is included. * * For example: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock)); + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); * * could be used to indicate to lockdep that foo->bar may only be dereferenced - * if either the RCU read lock is held, or that the lock required to replace + * if either rcu_read_lock() is held, or that the lock required to replace * the bar struct at foo->bar is held. * * Note that the list of conditions may also include indications of when a lock * need not be held, for example during initialisation or destruction of the * target struct: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock) || + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || * atomic_read(&foo->usage) == 0); + * + * Inserts memory barriers on architectures that require them + * (currently only the Alpha), prevents the compiler from refetching + * (and from merging fetches), and, more importantly, documents exactly + * which pointers are protected by RCU and checks that the pointer is + * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - ({ \ - if (debug_lockdep_rcu_enabled() && !(c)) \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ - rcu_dereference_raw(p); \ - }) + __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) /** - * rcu_dereference_protected - fetch RCU pointer when updates prevented + * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_bh_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) + +/** + * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_sched_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ + __rcu) + +#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ + +/** + * rcu_dereference_index_check() - rcu_dereference for indices with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * Similar to rcu_dereference_check(), but omits the sparse checking. + * This allows rcu_dereference_index_check() to be used on integers, + * which can then be used as array indices. Attempting to use + * rcu_dereference_check() on an integer will give compiler warnings + * because the sparse address-space mechanism relies on dereferencing + * the RCU-protected pointer. Dereferencing integers is not something + * that even gcc will put up with. + * + * Note that this function does not implicitly check for RCU read-side + * critical sections. If this function gains lots of uses, it might + * make sense to provide versions for each flavor of RCU, but it does + * not make sense as of early 2010. + */ +#define rcu_dereference_index_check(p, c) \ + __rcu_dereference_index_check((p), (c)) + +/** + * rcu_dereference_protected() - fetch RCU pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This @@ -236,36 +375,61 @@ extern int rcu_my_thread_group_empty(void); * prevent the compiler from repeating this reference or combining it * with other references, so it should not be used without protection * of appropriate locks. + * + * This function is only for update-side use. Using this function + * when protected only by rcu_read_lock() will result in infrequent + * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ - ({ \ - if (debug_lockdep_rcu_enabled() && !(c)) \ - lockdep_rcu_dereference(__FILE__, __LINE__); \ - (p); \ - }) + __rcu_dereference_protected((p), (c), __rcu) -#else /* #ifdef CONFIG_PROVE_RCU */ +/** + * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_bh_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#define rcu_dereference_check(p, c) rcu_dereference_raw(p) -#define rcu_dereference_protected(p, c) (p) +/** + * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_sched_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#endif /* #else #ifdef CONFIG_PROVE_RCU */ /** - * rcu_access_pointer - fetch RCU pointer with no dereferencing + * rcu_dereference() - fetch RCU-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit the - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful - * when the value of this pointer is accessed, but the pointer is not - * dereferenced, for example, when testing an RCU-protected pointer against - * NULL. This may also be used in cases where update-side locks prevent - * the value of the pointer from changing, but rcu_dereference_protected() - * is a lighter-weight primitive for this use case. + * This is a simple wrapper around rcu_dereference_check(). */ -#define rcu_access_pointer(p) ACCESS_ONCE(p) +#define rcu_dereference(p) rcu_dereference_check(p, 0) /** - * rcu_read_lock - mark the beginning of an RCU read-side critical section. + * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) + +/** + * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) + +/** + * rcu_read_lock() - mark the beginning of an RCU read-side critical section * * When synchronize_rcu() is invoked on one CPU while other CPUs * are within RCU read-side critical sections, then the @@ -311,7 +475,7 @@ static inline void rcu_read_lock(void) */ /** - * rcu_read_unlock - marks the end of an RCU read-side critical section. + * rcu_read_unlock() - marks the end of an RCU read-side critical section. * * See rcu_read_lock() for more information. */ @@ -323,15 +487,16 @@ static inline void rcu_read_unlock(void) } /** - * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section + * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section * * This is equivalent of rcu_read_lock(), but to be used when updates - * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks - * consider completion of a softirq handler to be a quiescent state, - * a process in RCU read-side critical section must be protected by - * disabling softirqs. Read-side critical sections in interrupt context - * can use just rcu_read_lock(). - * + * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since + * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a + * softirq handler to be a quiescent state, a process in RCU read-side + * critical section must be protected by disabling softirqs. Read-side + * critical sections in interrupt context can use just rcu_read_lock(), + * though this should at least be commented to avoid confusing people + * reading the code. */ static inline void rcu_read_lock_bh(void) { @@ -353,13 +518,12 @@ static inline void rcu_read_unlock_bh(void) } /** - * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section + * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section * - * Should be used with either - * - synchronize_sched() - * or - * - call_rcu_sched() and rcu_barrier_sched() - * on the write-side to insure proper synchronization. + * This is equivalent of rcu_read_lock(), but to be used when updates + * are being done using call_rcu_sched() or synchronize_rcu_sched(). + * Read-side critical sections can also be introduced by anything that + * disables preemption, including local_irq_disable() and friends. */ static inline void rcu_read_lock_sched(void) { @@ -394,54 +558,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) preempt_enable_notrace(); } - /** - * rcu_dereference_raw - fetch an RCU-protected pointer + * rcu_assign_pointer() - assign to RCU-protected pointer + * @p: pointer to assign to + * @v: value to assign (publish) * - * The caller must be within some flavor of RCU read-side critical - * section, or must be otherwise preventing the pointer from changing, - * for example, by holding an appropriate lock. This pointer may later - * be safely dereferenced. It is the caller's responsibility to have - * done the right thing, as this primitive does no checking of any kind. - * - * Inserts memory barriers on architectures that require them - * (currently only the Alpha), and, more importantly, documents - * exactly which pointers are protected by RCU. - */ -#define rcu_dereference_raw(p) ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) - -/** - * rcu_dereference - fetch an RCU-protected pointer, checking for RCU - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference(p) \ - rcu_dereference_check(p, rcu_read_lock_held()) - -/** - * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_bh(p) \ - rcu_dereference_check(p, rcu_read_lock_bh_held()) - -/** - * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_sched(p) \ - rcu_dereference_check(p, rcu_read_lock_sched_held()) - -/** - * rcu_assign_pointer - assign (publicize) a pointer to a newly - * initialized structure that will be dereferenced by RCU read-side - * critical sections. Returns the value assigned. + * Assigns the specified value to the specified RCU-protected + * pointer, ensuring that any concurrent RCU readers will see + * any prior initialization. Returns the value assigned. * * Inserts memory barriers on architectures that require them * (pretty much all of them other than x86), and also prevents @@ -450,14 +574,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * call documents which pointers will be dereferenced by RCU read-side * code. */ - #define rcu_assign_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - smp_wmb(); \ - (p) = (v); \ - }) + __rcu_assign_pointer((p), (v), __rcu) + +/** + * RCU_INIT_POINTER() - initialize an RCU protected pointer + * + * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep + * splats. + */ +#define RCU_INIT_POINTER(p, v) \ + p = (typeof(*v) __force __rcu *)(v) /* Infrastructure to implement the synchronize_() primitives. */ @@ -469,7 +596,7 @@ struct rcu_synchronize { extern void wakeme_after_rcu(struct rcu_head *head); /** - * call_rcu - Queue an RCU callback for invocation after a grace period. + * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * @@ -483,7 +610,7 @@ extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); /** - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. * @func: actual update function to be invoked after the grace period * @@ -503,4 +630,41 @@ extern void call_rcu(struct rcu_head *head, extern void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *head)); +/* + * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally + * by call_rcu() and rcu callback execution, and are therefore not part of the + * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. + */ + +#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD +# define STATE_RCU_HEAD_READY 0 +# define STATE_RCU_HEAD_QUEUED 1 + +extern struct debug_obj_descr rcuhead_debug_descr; + +static inline void debug_rcu_head_queue(struct rcu_head *head) +{ + debug_object_activate(head, &rcuhead_debug_descr); + debug_object_active_state(head, &rcuhead_debug_descr, + STATE_RCU_HEAD_READY, + STATE_RCU_HEAD_QUEUED); +} + +static inline void debug_rcu_head_unqueue(struct rcu_head *head) +{ + debug_object_active_state(head, &rcuhead_debug_descr, + STATE_RCU_HEAD_QUEUED, + STATE_RCU_HEAD_READY); + debug_object_deactivate(head, &rcuhead_debug_descr); +} +#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ +static inline void debug_rcu_head_queue(struct rcu_head *head) +{ +} + +static inline void debug_rcu_head_unqueue(struct rcu_head *head) +{ +} +#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ + #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index a519587..14e5a76 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -29,6 +29,10 @@ void rcu_sched_qs(int cpu); void rcu_bh_qs(int cpu); +static inline void rcu_note_context_switch(int cpu) +{ + rcu_sched_qs(cpu); +} #define __rcu_read_lock() preempt_disable() #define __rcu_read_unlock() preempt_enable() @@ -74,7 +78,17 @@ static inline void rcu_sched_force_quiescent_state(void) { } -#define synchronize_rcu synchronize_sched +extern void synchronize_sched(void); + +static inline void synchronize_rcu(void) +{ + synchronize_sched(); +} + +static inline void synchronize_rcu_bh(void) +{ + synchronize_sched(); +} static inline void synchronize_rcu_expedited(void) { @@ -114,4 +128,17 @@ static inline int rcu_preempt_depth(void) return 0; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +extern int rcu_scheduler_active __read_mostly; +extern void rcu_scheduler_starting(void); + +#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + +static inline void rcu_scheduler_starting(void) +{ +} + +#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + #endif /* __LINUX_RCUTINY_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 42cc3a0..4828205 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -34,6 +34,7 @@ struct notifier_block; extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); +extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); extern int rcu_expedited_torture_stats(char *page); @@ -86,6 +87,8 @@ static inline void __rcu_read_unlock_bh(void) extern void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); +extern void synchronize_rcu_bh(void); +extern void synchronize_sched(void); extern void synchronize_rcu_expedited(void); static inline void synchronize_rcu_bh_expedited(void) @@ -120,4 +123,7 @@ static inline int rcu_blocking_is_gp(void) return num_online_cpus() == 1; } +extern void rcu_scheduler_starting(void); +extern int rcu_scheduler_active __read_mostly; + #endif /* __LINUX_RCUTREE_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index dad7f66..34d28f7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1301,9 +1301,9 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - const struct cred *real_cred; /* objective and real subjective task + const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ - const struct cred *cred; /* effective (overridable) subjective task + const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations @@ -1429,7 +1429,7 @@ struct task_struct { #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock */ - struct css_set *cgroups; + struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif @@ -1442,7 +1442,7 @@ struct task_struct { struct futex_pi_state *pi_state_cache; #endif #ifdef CONFIG_PERF_EVENTS - struct perf_event_context *perf_event_ctxp; + struct perf_event_context __rcu *perf_event_ctxp; struct mutex perf_event_mutex; struct list_head perf_event_list; #endif diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4d5ecb2..6f456a7 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -27,6 +27,8 @@ #ifndef _LINUX_SRCU_H #define _LINUX_SRCU_H +#include + struct srcu_struct_array { int c[2]; }; @@ -84,8 +86,8 @@ long srcu_batches_completed(struct srcu_struct *sp); /** * srcu_read_lock_held - might we be in SRCU read-side critical section? * - * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in - * an SRCU read-side critical section. In absence of CONFIG_PROVE_LOCKING, + * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU + * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, * this assumes we are in an SRCU read-side critical section unless it can * prove otherwise. */ @@ -106,12 +108,31 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /** - * srcu_dereference - fetch SRCU-protected pointer with checking + * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. + * @c: condition to check for update-side use + * + * If PROVE_RCU is enabled, invoking this outside of an RCU read-side + * critical section will result in an RCU-lockdep splat, unless @c evaluates + * to 1. The @c argument will normally be a logical expression containing + * lockdep_is_held() calls. + */ +#define srcu_dereference_check(p, sp, c) \ + __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) + +/** + * srcu_dereference - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. * - * Makes rcu_dereference_check() do the dirty work. + * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU + * is enabled, invoking this outside of an RCU read-side critical + * section will result in an RCU-lockdep splat. */ -#define srcu_dereference(p, sp) \ - rcu_dereference_check(p, srcu_read_lock_held(sp)) +#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) /** * srcu_read_lock - register a new reader for an SRCU-protected structure. diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index d48d4e6..994db5a 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -69,7 +69,7 @@ struct gss_cl_ctx { enum rpc_gss_proc gc_proc; u32 gc_seq; spinlock_t gc_seq_lock; - struct gss_ctx *gc_gss_ctx; + struct gss_ctx __rcu *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; u32 gc_win; unsigned long gc_expiry; @@ -80,7 +80,7 @@ struct gss_upcall_msg; struct gss_cred { struct rpc_cred gc_base; enum rpc_gss_svc gc_service; - struct gss_cl_ctx *gc_ctx; + struct gss_cl_ctx __rcu *gc_ctx; struct gss_upcall_msg *gc_upcall; unsigned char gc_machine_cred : 1; }; diff --git a/include/net/dst.h b/include/net/dst.h index ce078cd..5f839aa 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -94,7 +94,7 @@ struct dst_entry { unsigned long lastuse; union { struct dst_entry *next; - struct rtable *rt_next; + struct rtable __rcu *rt_next; struct rt6_info *rt6_next; struct dn_route *dn_next; }; diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index c49086d..e88dc69 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -20,7 +20,7 @@ struct fib_rule { u32 table; u8 action; u32 target; - struct fib_rule * ctarget; + struct fib_rule __rcu *ctarget; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; struct rcu_head rcu; diff --git a/include/net/garp.h b/include/net/garp.h index 825f172..15b30ba 100644 --- a/include/net/garp.h +++ b/include/net/garp.h @@ -107,7 +107,7 @@ struct garp_applicant { }; struct garp_port { - struct garp_applicant *applicants[GARP_APPLICATION_MAX + 1]; + struct garp_applicant __rcu *applicants[GARP_APPLICATION_MAX + 1]; }; extern int garp_register_application(struct garp_application *app); diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 83fd344..b16192f 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -139,7 +139,7 @@ struct inet_sock { mc_all:1; int mc_index; __be32 mc_addr; - struct ip_mc_socklist *mc_list; + struct ip_mc_socklist __rcu *mc_list; struct { unsigned int flags; unsigned int fragsize; diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index fbf9d1c..94fa2cc 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -13,7 +13,7 @@ /* IPv6 tunnel */ struct ip6_tnl { - struct ip6_tnl *next; /* next tunnel in list */ + struct ip6_tnl __rcu *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ struct ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ diff --git a/include/net/ipip.h b/include/net/ipip.h index 11e8513..ea186ab 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -16,7 +16,7 @@ struct ip_tunnel_6rd_parm { }; struct ip_tunnel { - struct ip_tunnel *next; + struct ip_tunnel __rcu *next; struct net_device *dev; int err_count; /* Number of arrived ICMP errors */ @@ -34,12 +34,12 @@ struct ip_tunnel { #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd_parm ip6rd; #endif - struct ip_tunnel_prl_entry *prl; /* potential router list */ + struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */ unsigned int prl_count; /* # of entries in PRL */ }; struct ip_tunnel_prl_entry { - struct ip_tunnel_prl_entry *next; + struct ip_tunnel_prl_entry __rcu *next; __be32 addr; u16 flags; struct rcu_head rcu_head; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bd10a79..573d100 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -90,7 +90,7 @@ struct net { #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; #endif - struct net_generic *gen; + struct net_generic __rcu *gen; }; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index bde095f..92229d1 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -75,7 +75,7 @@ struct nf_conntrack_helper; /* nf_conn feature for connections that have a helper */ struct nf_conn_help { /* Helper. if any */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; union nf_conntrack_help help; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 74f119a..9e0915d 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -48,7 +48,7 @@ struct netns_xfrm { struct dst_ops xfrm6_dst_ops; #endif - struct sock *nlsk; + struct sock __rcu *nlsk; struct sock *nlsk_stash; u32 sysctl_aevent_etime; diff --git a/include/net/sock.h b/include/net/sock.h index 1ad6435..a1d5a76 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -148,7 +148,7 @@ struct sock_common { }; struct proto *skc_prot; #ifdef CONFIG_NET_NS - struct net *skc_net; + struct net __rcu *skc_net; #endif }; @@ -293,7 +293,7 @@ struct sock { struct ucred sk_peercred; long sk_rcvtimeo; long sk_sndtimeo; - struct sk_filter *sk_filter; + struct sk_filter __rcu *sk_filter; void *sk_protinfo; struct timer_list sk_timer; ktime_t sk_stamp; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 3a53c77..5cfbc93 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -138,7 +138,7 @@ struct css_id { * is called after synchronize_rcu(). But for safe use, css_is_removed() * css_tryget() should be used for avoiding race. */ - struct cgroup_subsys_state *css; + struct cgroup_subsys_state __rcu *css; /* * ID of this css. */ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 2594e1c..3a756ba 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3801,8 +3801,11 @@ void lockdep_rcu_dereference(const char *file, const int line) { struct task_struct *curr = current; +#ifndef CONFIG_PROVE_RCU_REPEATEDLY if (!debug_locks_off()) return; +#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ + /* Note: the following can be executed concurrently, so be careful. */ printk("\n===================================================\n"); printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); printk( "---------------------------------------------------\n"); diff --git a/kernel/pid.c b/kernel/pid.c index aebb30d..8bb38ee 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -367,7 +367,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) struct task_struct *result = NULL; if (pid) { struct hlist_node *first; - first = rcu_dereference_check(pid->tasks[type].first, + first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), rcu_read_lock_held() || lockdep_tasklist_lock_is_held()); if (first) diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 49d808e..2d33c7e 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -64,9 +63,6 @@ struct lockdep_map rcu_sched_lock_map = EXPORT_SYMBOL_GPL(rcu_sched_lock_map); #endif -int rcu_scheduler_active __read_mostly; -EXPORT_SYMBOL_GPL(rcu_scheduler_active); - #ifdef CONFIG_DEBUG_LOCK_ALLOC int debug_lockdep_rcu_enabled(void) @@ -77,12 +73,14 @@ int debug_lockdep_rcu_enabled(void) EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); /** - * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? + * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? * * Check for bottom half being disabled, which covers both the * CONFIG_PROVE_RCU and not cases. Note that if someone uses * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) - * will show the situation. + * will show the situation. This is useful for debug checks in functions + * that require that they be called within an RCU read-side critical + * section. * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. */ @@ -97,21 +95,6 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /* - * This function is invoked towards the end of the scheduler's initialization - * process. Before this is called, the idle task might contain - * RCU read-side critical sections (during which time, this idle - * task is booting the system). After this function is called, the - * idle tasks are prohibited from containing RCU read-side critical - * sections. - */ -void rcu_scheduler_starting(void) -{ - WARN_ON(num_online_cpus() != 1); - WARN_ON(nr_context_switches() > 0); - rcu_scheduler_active = 1; -} - -/* * Awaken the corresponding synchronize_rcu() instance now that a * grace period has elapsed. */ @@ -133,3 +116,173 @@ int rcu_my_thread_group_empty(void) } EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty); #endif /* #ifdef CONFIG_PROVE_RCU */ + +#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD +static inline void debug_init_rcu_head(struct rcu_head *head) +{ + debug_object_init(head, &rcuhead_debug_descr); +} + +static inline void debug_rcu_head_free(struct rcu_head *head) +{ + debug_object_free(head, &rcuhead_debug_descr); +} + +/* + * fixup_init is called when: + * - an active object is initialized + */ +static int rcuhead_fixup_init(void *addr, enum debug_obj_state state) +{ + struct rcu_head *head = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + /* + * Ensure that queued callbacks are all executed. + * If we detect that we are nested in a RCU read-side critical + * section, we should simply fail, otherwise we would deadlock. + */ +#ifndef CONFIG_PREEMPT + WARN_ON(1); + return 0; +#else + if (rcu_preempt_depth() != 0 || preempt_count() != 0 || + irqs_disabled()) { + WARN_ON(1); + return 0; + } + rcu_barrier(); + rcu_barrier_sched(); + rcu_barrier_bh(); + debug_object_init(head, &rcuhead_debug_descr); + return 1; +#endif + default: + return 0; + } +} + +/* + * fixup_activate is called when: + * - an active object is activated + * - an unknown object is activated (might be a statically initialized object) + * Activation is performed internally by call_rcu(). + */ +static int rcuhead_fixup_activate(void *addr, enum debug_obj_state state) +{ + struct rcu_head *head = addr; + + switch (state) { + + case ODEBUG_STATE_NOTAVAILABLE: + /* + * This is not really a fixup. We just make sure that it is + * tracked in the object tracker. + */ + debug_object_init(head, &rcuhead_debug_descr); + debug_object_activate(head, &rcuhead_debug_descr); + return 0; + + case ODEBUG_STATE_ACTIVE: + /* + * Ensure that queued callbacks are all executed. + * If we detect that we are nested in a RCU read-side critical + * section, we should simply fail, otherwise we would deadlock. + */ +#ifndef CONFIG_PREEMPT + WARN_ON(1); + return 0; +#else + if (rcu_preempt_depth() != 0 || preempt_count() != 0 || + irqs_disabled()) { + WARN_ON(1); + return 0; + } + rcu_barrier(); + rcu_barrier_sched(); + rcu_barrier_bh(); + debug_object_activate(head, &rcuhead_debug_descr); + return 1; +#endif + default: + return 0; + } +} + +/* + * fixup_free is called when: + * - an active object is freed + */ +static int rcuhead_fixup_free(void *addr, enum debug_obj_state state) +{ + struct rcu_head *head = addr; + + switch (state) { + case ODEBUG_STATE_ACTIVE: + /* + * Ensure that queued callbacks are all executed. + * If we detect that we are nested in a RCU read-side critical + * section, we should simply fail, otherwise we would deadlock. + */ +#ifndef CONFIG_PREEMPT + WARN_ON(1); + return 0; +#else + if (rcu_preempt_depth() != 0 || preempt_count() != 0 || + irqs_disabled()) { + WARN_ON(1); + return 0; + } + rcu_barrier(); + rcu_barrier_sched(); + rcu_barrier_bh(); + debug_object_free(head, &rcuhead_debug_descr); + return 1; +#endif + default: + return 0; + } +} + +/** + * init_rcu_head_on_stack() - initialize on-stack rcu_head for debugobjects + * @head: pointer to rcu_head structure to be initialized + * + * This function informs debugobjects of a new rcu_head structure that + * has been allocated as an auto variable on the stack. This function + * is not required for rcu_head structures that are statically defined or + * that are dynamically allocated on the heap. This function has no + * effect for !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds. + */ +void init_rcu_head_on_stack(struct rcu_head *head) +{ + debug_object_init_on_stack(head, &rcuhead_debug_descr); +} +EXPORT_SYMBOL_GPL(init_rcu_head_on_stack); + +/** + * destroy_rcu_head_on_stack() - destroy on-stack rcu_head for debugobjects + * @head: pointer to rcu_head structure to be initialized + * + * This function informs debugobjects that an on-stack rcu_head structure + * is about to go out of scope. As with init_rcu_head_on_stack(), this + * function is not required for rcu_head structures that are statically + * defined or that are dynamically allocated on the heap. Also as with + * init_rcu_head_on_stack(), this function has no effect for + * !CONFIG_DEBUG_OBJECTS_RCU_HEAD kernel builds. + */ +void destroy_rcu_head_on_stack(struct rcu_head *head) +{ + debug_object_free(head, &rcuhead_debug_descr); +} +EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack); + +struct debug_obj_descr rcuhead_debug_descr = { + .name = "rcu_head", + .fixup_init = rcuhead_fixup_init, + .fixup_activate = rcuhead_fixup_activate, + .fixup_free = rcuhead_fixup_free, +}; +EXPORT_SYMBOL_GPL(rcuhead_debug_descr); +#endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */ diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 9f6d9ff..196ec02 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -44,9 +44,9 @@ struct rcu_ctrlblk { }; /* Definition for rcupdate control block. */ -static struct rcu_ctrlblk rcu_ctrlblk = { - .donetail = &rcu_ctrlblk.rcucblist, - .curtail = &rcu_ctrlblk.rcucblist, +static struct rcu_ctrlblk rcu_sched_ctrlblk = { + .donetail = &rcu_sched_ctrlblk.rcucblist, + .curtail = &rcu_sched_ctrlblk.rcucblist, }; static struct rcu_ctrlblk rcu_bh_ctrlblk = { @@ -54,6 +54,11 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = { .curtail = &rcu_bh_ctrlblk.rcucblist, }; +#ifdef CONFIG_DEBUG_LOCK_ALLOC +int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + #ifdef CONFIG_NO_HZ static long rcu_dynticks_nesting = 1; @@ -108,7 +113,8 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) */ void rcu_sched_qs(int cpu) { - if (rcu_qsctr_help(&rcu_ctrlblk) + rcu_qsctr_help(&rcu_bh_ctrlblk)) + if (rcu_qsctr_help(&rcu_sched_ctrlblk) + + rcu_qsctr_help(&rcu_bh_ctrlblk)) raise_softirq(RCU_SOFTIRQ); } @@ -163,6 +169,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) while (list) { next = list->next; prefetch(next); + debug_rcu_head_unqueue(list); list->func(list); list = next; } @@ -173,7 +180,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) */ static void rcu_process_callbacks(struct softirq_action *unused) { - __rcu_process_callbacks(&rcu_ctrlblk); + __rcu_process_callbacks(&rcu_sched_ctrlblk); __rcu_process_callbacks(&rcu_bh_ctrlblk); } @@ -187,7 +194,8 @@ static void rcu_process_callbacks(struct softirq_action *unused) * * Cool, huh? (Due to Josh Triplett.) * - * But we want to make this a static inline later. + * But we want to make this a static inline later. The cond_resched() + * currently makes this problematic. */ void synchronize_sched(void) { @@ -195,12 +203,6 @@ void synchronize_sched(void) } EXPORT_SYMBOL_GPL(synchronize_sched); -void synchronize_rcu_bh(void) -{ - synchronize_sched(); -} -EXPORT_SYMBOL_GPL(synchronize_rcu_bh); - /* * Helper function for call_rcu() and call_rcu_bh(). */ @@ -210,6 +212,7 @@ static void __call_rcu(struct rcu_head *head, { unsigned long flags; + debug_rcu_head_queue(head); head->func = func; head->next = NULL; @@ -226,7 +229,7 @@ static void __call_rcu(struct rcu_head *head, */ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { - __call_rcu(head, func, &rcu_ctrlblk); + __call_rcu(head, func, &rcu_sched_ctrlblk); } EXPORT_SYMBOL_GPL(call_rcu); @@ -244,11 +247,13 @@ void rcu_barrier(void) { struct rcu_synchronize rcu; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(rcu_barrier); @@ -256,11 +261,13 @@ void rcu_barrier_bh(void) { struct rcu_synchronize rcu; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_bh(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(rcu_barrier_bh); @@ -268,11 +275,13 @@ void rcu_barrier_sched(void) { struct rcu_synchronize rcu; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_sched(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(rcu_barrier_sched); @@ -280,3 +289,5 @@ void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } + +#include "rcutiny_plugin.h" diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h new file mode 100644 index 0000000..d223a92 --- /dev/null +++ b/kernel/rcutiny_plugin.h @@ -0,0 +1,39 @@ +/* + * Read-Copy Update mechanism for mutual exclusion (tree-based version) + * Internal non-public definitions that provide either classic + * or preemptable semantics. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright IBM Corporation, 2009 + * + * Author: Paul E. McKenney + */ + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + +#include + +/* + * During boot, we forgive RCU lockdep issues. After this function is + * invoked, we start taking RCU lockdep issues seriously. + */ +void rcu_scheduler_starting(void) +{ + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} + +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 58df55b..077defb 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -464,9 +464,11 @@ static void rcu_bh_torture_synchronize(void) { struct rcu_bh_torture_synchronize rcu; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb); wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } static struct rcu_torture_ops rcu_bh_ops = { diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 3ec8160..d5bc439 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "rcutree.h" @@ -53,8 +54,8 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; -#define RCU_STATE_INITIALIZER(name) { \ - .level = { &name.node[0] }, \ +#define RCU_STATE_INITIALIZER(structname) { \ + .level = { &structname.node[0] }, \ .levelcnt = { \ NUM_RCU_LVL_0, /* root of hierarchy. */ \ NUM_RCU_LVL_1, \ @@ -65,13 +66,14 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; .signaled = RCU_GP_IDLE, \ .gpnum = -300, \ .completed = -300, \ - .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&name.onofflock), \ + .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ .orphan_cbs_list = NULL, \ - .orphan_cbs_tail = &name.orphan_cbs_list, \ + .orphan_cbs_tail = &structname.orphan_cbs_list, \ .orphan_qlen = 0, \ - .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&name.fqslock), \ + .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ .n_force_qs = 0, \ .n_force_qs_ngp = 0, \ + .name = #structname, \ } struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); @@ -80,6 +82,9 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); +int rcu_scheduler_active __read_mostly; +EXPORT_SYMBOL_GPL(rcu_scheduler_active); + /* * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s * permit this function to be invoked without holding the root rcu_node @@ -97,25 +102,32 @@ static int rcu_gp_in_progress(struct rcu_state *rsp) */ void rcu_sched_qs(int cpu) { - struct rcu_data *rdp; + struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); - rdp = &per_cpu(rcu_sched_data, cpu); rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; - rcu_preempt_note_context_switch(cpu); } void rcu_bh_qs(int cpu) { - struct rcu_data *rdp; + struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); - rdp = &per_cpu(rcu_bh_data, cpu); rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; } +/* + * Note a context switch. This is a quiescent state for RCU-sched, + * and requires special handling for preemptible RCU. + */ +void rcu_note_context_switch(int cpu) +{ + rcu_sched_qs(cpu); + rcu_preempt_note_context_switch(cpu); +} + #ifdef CONFIG_NO_HZ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = 1, @@ -438,6 +450,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) #ifdef CONFIG_RCU_CPU_STALL_DETECTOR +int rcu_cpu_stall_panicking __read_mostly; + static void record_gp_stall_check_time(struct rcu_state *rsp) { rsp->gp_start = jiffies; @@ -470,7 +484,8 @@ static void print_other_cpu_stall(struct rcu_state *rsp) /* OK, time to rat on our buddy... */ - printk(KERN_ERR "INFO: RCU detected CPU stalls:"); + printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", + rsp->name); rcu_for_each_leaf_node(rsp, rnp) { raw_spin_lock_irqsave(&rnp->lock, flags); rcu_print_task_stall(rnp); @@ -481,7 +496,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp) if (rnp->qsmask & (1UL << cpu)) printk(" %d", rnp->grplo + cpu); } - printk(" (detected by %d, t=%ld jiffies)\n", + printk("} (detected by %d, t=%ld jiffies)\n", smp_processor_id(), (long)(jiffies - rsp->gp_start)); trigger_all_cpu_backtrace(); @@ -497,8 +512,8 @@ static void print_cpu_stall(struct rcu_state *rsp) unsigned long flags; struct rcu_node *rnp = rcu_get_root(rsp); - printk(KERN_ERR "INFO: RCU detected CPU %d stall (t=%lu jiffies)\n", - smp_processor_id(), jiffies - rsp->gp_start); + printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", + rsp->name, smp_processor_id(), jiffies - rsp->gp_start); trigger_all_cpu_backtrace(); raw_spin_lock_irqsave(&rnp->lock, flags); @@ -515,6 +530,8 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) long delta; struct rcu_node *rnp; + if (rcu_cpu_stall_panicking) + return; delta = jiffies - rsp->jiffies_stall; rnp = rdp->mynode; if ((rnp->qsmask & rdp->grpmask) && delta >= 0) { @@ -529,6 +546,21 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) } } +static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) +{ + rcu_cpu_stall_panicking = 1; + return NOTIFY_DONE; +} + +static struct notifier_block rcu_panic_block = { + .notifier_call = rcu_panic, +}; + +static void __init check_cpu_stall_init(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, &rcu_panic_block); +} + #else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ static void record_gp_stall_check_time(struct rcu_state *rsp) @@ -539,6 +571,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) { } +static void __init check_cpu_stall_init(void) +{ +} + #endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* @@ -1076,6 +1112,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) while (list) { next = list->next; prefetch(next); + debug_rcu_head_unqueue(list); list->func(list); list = next; if (++count >= rdp->blimit) @@ -1125,8 +1162,6 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) */ void rcu_check_callbacks(int cpu, int user) { - if (!rcu_pending(cpu)) - return; /* if nothing for RCU to do. */ if (user || (idle_cpu(cpu) && rcu_scheduler_active && !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { @@ -1158,7 +1193,8 @@ void rcu_check_callbacks(int cpu, int user) rcu_bh_qs(cpu); } rcu_preempt_check_callbacks(cpu); - raise_softirq(RCU_SOFTIRQ); + if (rcu_pending(cpu)) + raise_softirq(RCU_SOFTIRQ); } #ifdef CONFIG_SMP @@ -1236,11 +1272,11 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed) break; /* grace period idle or initializing, ignore. */ case RCU_SAVE_DYNTICK: - - raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) break; /* So gcc recognizes the dead code. */ + raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ + /* Record dyntick-idle state. */ force_qs_rnp(rsp, dyntick_save_progress_counter); raw_spin_lock(&rnp->lock); /* irqs already disabled */ @@ -1353,6 +1389,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), unsigned long flags; struct rcu_data *rdp; + debug_rcu_head_queue(head); head->func = func; head->next = NULL; @@ -1449,11 +1486,13 @@ void synchronize_sched(void) if (rcu_blocking_is_gp()) return; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_sched(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(synchronize_sched); @@ -1473,11 +1512,13 @@ void synchronize_rcu_bh(void) if (rcu_blocking_is_gp()) return; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_bh(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(synchronize_rcu_bh); @@ -1498,8 +1539,20 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) check_cpu_stall(rsp, rdp); /* Is the RCU core waiting for a quiescent state from this CPU? */ - if (rdp->qs_pending) { + if (rdp->qs_pending && !rdp->passed_quiesc) { + + /* + * If force_quiescent_state() coming soon and this CPU + * needs a quiescent state, and this is either RCU-sched + * or RCU-bh, force a local reschedule. + */ rdp->n_rp_qs_pending++; + if (!rdp->preemptable && + ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, + jiffies)) + set_need_resched(); + } else if (rdp->qs_pending && rdp->passed_quiesc) { + rdp->n_rp_report_qs++; return 1; } @@ -1767,6 +1820,21 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, } /* + * This function is invoked towards the end of the scheduler's initialization + * process. Before this is called, the idle task might contain + * RCU read-side critical sections (during which time, this idle + * task is booting the system). After this function is called, the + * idle tasks are prohibited from containing RCU read-side critical + * sections. This function also enables RCU lockdep checking. + */ +void rcu_scheduler_starting(void) +{ + WARN_ON(num_online_cpus() != 1); + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} + +/* * Compute the per-level fanout, either using the exact fanout specified * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. */ @@ -1849,6 +1917,14 @@ static void __init rcu_init_one(struct rcu_state *rsp) INIT_LIST_HEAD(&rnp->blocked_tasks[3]); } } + + rnp = rsp->level[NUM_RCU_LVLS - 1]; + for_each_possible_cpu(i) { + while (i > rnp->grphi) + rnp++; + rsp->rda[i]->mynode = rnp; + rcu_boot_init_percpu_data(i, rsp); + } } /* @@ -1859,19 +1935,11 @@ static void __init rcu_init_one(struct rcu_state *rsp) #define RCU_INIT_FLAVOR(rsp, rcu_data) \ do { \ int i; \ - int j; \ - struct rcu_node *rnp; \ \ - rcu_init_one(rsp); \ - rnp = (rsp)->level[NUM_RCU_LVLS - 1]; \ - j = 0; \ for_each_possible_cpu(i) { \ - if (i > rnp[j].grphi) \ - j++; \ - per_cpu(rcu_data, i).mynode = &rnp[j]; \ (rsp)->rda[i] = &per_cpu(rcu_data, i); \ - rcu_boot_init_percpu_data(i, rsp); \ } \ + rcu_init_one(rsp); \ } while (0) void __init rcu_init(void) @@ -1879,12 +1947,6 @@ void __init rcu_init(void) int cpu; rcu_bootup_announce(); -#ifdef CONFIG_RCU_CPU_STALL_DETECTOR - printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ -#if NUM_RCU_LVL_4 != 0 - printk(KERN_INFO "Experimental four-level hierarchy is enabled.\n"); -#endif /* #if NUM_RCU_LVL_4 != 0 */ RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); __rcu_init_preempt(); @@ -1898,6 +1960,7 @@ void __init rcu_init(void) cpu_notifier(rcu_cpu_notify, 0); for_each_online_cpu(cpu) rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); + check_cpu_stall_init(); } #include "rcutree_plugin.h" diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 4a525a3..14c040b 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -223,6 +223,7 @@ struct rcu_data { /* 5) __rcu_pending() statistics. */ unsigned long n_rcu_pending; /* rcu_pending() calls since boot. */ unsigned long n_rp_qs_pending; + unsigned long n_rp_report_qs; unsigned long n_rp_cb_ready; unsigned long n_rp_cpu_needs_gp; unsigned long n_rp_gp_completed; @@ -326,6 +327,7 @@ struct rcu_state { unsigned long jiffies_stall; /* Time at which to check */ /* for CPU stalls. */ #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + char *name; /* Name of structure. */ }; /* Return values for rcu_preempt_offline_tasks(). */ diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 79b53bd..0e4f420 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -26,6 +26,45 @@ #include +/* + * Check the RCU kernel configuration parameters and print informative + * messages about anything out of the ordinary. If you like #ifdef, you + * will love this function. + */ +static void __init rcu_bootup_announce_oddness(void) +{ +#ifdef CONFIG_RCU_TRACE + printk(KERN_INFO "\tRCU debugfs-based tracing is enabled.\n"); +#endif +#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) + printk(KERN_INFO "\tCONFIG_RCU_FANOUT set to non-default value of %d\n", + CONFIG_RCU_FANOUT); +#endif +#ifdef CONFIG_RCU_FANOUT_EXACT + printk(KERN_INFO "\tHierarchical RCU autobalancing is disabled.\n"); +#endif +#ifdef CONFIG_RCU_FAST_NO_HZ + printk(KERN_INFO + "\tRCU dyntick-idle grace-period acceleration is enabled.\n"); +#endif +#ifdef CONFIG_PROVE_RCU + printk(KERN_INFO "\tRCU lockdep checking is enabled.\n"); +#endif +#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE + printk(KERN_INFO "\tRCU torture testing starts during boot.\n"); +#endif +#ifndef CONFIG_RCU_CPU_STALL_DETECTOR + printk(KERN_INFO + "\tRCU-based detection of stalled CPUs is disabled.\n"); +#endif +#ifndef CONFIG_RCU_CPU_STALL_VERBOSE + printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n"); +#endif +#if NUM_RCU_LVL_4 != 0 + printk(KERN_INFO "\tExperimental four-level hierarchy is enabled.\n"); +#endif +} + #ifdef CONFIG_TREE_PREEMPT_RCU struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); @@ -38,8 +77,8 @@ static int rcu_preempted_readers_exp(struct rcu_node *rnp); */ static void __init rcu_bootup_announce(void) { - printk(KERN_INFO - "Experimental preemptable hierarchical RCU implementation.\n"); + printk(KERN_INFO "Preemptable hierarchical RCU implementation.\n"); + rcu_bootup_announce_oddness(); } /* @@ -75,13 +114,19 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); * that this just means that the task currently running on the CPU is * not in a quiescent state. There might be any number of tasks blocked * while in an RCU read-side critical section. + * + * Unlike the other rcu_*_qs() functions, callers to this function + * must disable irqs in order to protect the assignment to + * ->rcu_read_unlock_special. */ static void rcu_preempt_qs(int cpu) { struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); + rdp->passed_quiesc_completed = rdp->gpnum - 1; barrier(); rdp->passed_quiesc = 1; + current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; } /* @@ -144,9 +189,8 @@ static void rcu_preempt_note_context_switch(int cpu) * grace period, then the fact that the task has been enqueued * means that we continue to block the current grace period. */ - rcu_preempt_qs(cpu); local_irq_save(flags); - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + rcu_preempt_qs(cpu); local_irq_restore(flags); } @@ -236,7 +280,6 @@ static void rcu_read_unlock_special(struct task_struct *t) */ special = t->rcu_read_unlock_special; if (special & RCU_READ_UNLOCK_NEED_QS) { - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; rcu_preempt_qs(smp_processor_id()); } @@ -473,7 +516,6 @@ static void rcu_preempt_check_callbacks(int cpu) struct task_struct *t = current; if (t->rcu_read_lock_nesting == 0) { - t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; rcu_preempt_qs(cpu); return; } @@ -515,11 +557,13 @@ void synchronize_rcu(void) if (!rcu_scheduler_active) return; + init_rcu_head_on_stack(&rcu.head); init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu(&rcu.head, wakeme_after_rcu); /* Wait for it. */ wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); } EXPORT_SYMBOL_GPL(synchronize_rcu); @@ -754,6 +798,7 @@ void exit_rcu(void) static void __init rcu_bootup_announce(void) { printk(KERN_INFO "Hierarchical RCU implementation.\n"); + rcu_bootup_announce_oddness(); } /* @@ -1008,6 +1053,8 @@ static DEFINE_PER_CPU(unsigned long, rcu_dyntick_holdoff); int rcu_needs_cpu(int cpu) { int c = 0; + int snap; + int snap_nmi; int thatcpu; /* Check for being in the holdoff period. */ @@ -1015,12 +1062,18 @@ int rcu_needs_cpu(int cpu) return rcu_needs_cpu_quick_check(cpu); /* Don't bother unless we are the last non-dyntick-idle CPU. */ - for_each_cpu_not(thatcpu, nohz_cpu_mask) - if (thatcpu != cpu) { + for_each_online_cpu(thatcpu) { + if (thatcpu == cpu) + continue; + snap = per_cpu(rcu_dynticks, thatcpu).dynticks; + snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; + smp_mb(); /* Order sampling of snap with end of grace period. */ + if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { per_cpu(rcu_dyntick_drain, cpu) = 0; per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; return rcu_needs_cpu_quick_check(cpu); } + } /* Check and update the rcu_dyntick_drain sequencing. */ if (per_cpu(rcu_dyntick_drain, cpu) <= 0) { diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index d45db2e..36c95b4 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -241,11 +241,13 @@ static const struct file_operations rcugp_fops = { static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp) { seq_printf(m, "%3d%cnp=%ld " - "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n", + "qsp=%ld rpq=%ld cbr=%ld cng=%ld " + "gpc=%ld gps=%ld nf=%ld nn=%ld\n", rdp->cpu, cpu_is_offline(rdp->cpu) ? '!' : ' ', rdp->n_rcu_pending, rdp->n_rp_qs_pending, + rdp->n_rp_report_qs, rdp->n_rp_cb_ready, rdp->n_rp_cpu_needs_gp, rdp->n_rp_gp_completed, diff --git a/kernel/sched.c b/kernel/sched.c index 3c2a54f..53e1670 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -539,7 +539,7 @@ struct rq { #ifdef CONFIG_SMP struct root_domain *rd; - struct sched_domain *sd; + struct sched_domain __rcu *sd; unsigned char idle_at_tick; /* For active balancing */ @@ -3706,7 +3706,7 @@ need_resched: preempt_disable(); cpu = smp_processor_id(); rq = cpu_rq(cpu); - rcu_sched_qs(cpu); + rcu_note_context_switch(cpu); prev = rq->curr; switch_count = &prev->nivcsw; diff --git a/kernel/softirq.c b/kernel/softirq.c index 7c1a67e..0db913a 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -716,7 +716,7 @@ static int run_ksoftirqd(void * __bind_cpu) preempt_enable_no_resched(); cond_resched(); preempt_disable(); - rcu_sched_qs((long)__bind_cpu); + rcu_note_context_switch((long)__bind_cpu); } preempt_enable(); set_current_state(TASK_INTERRUPTIBLE); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 935248b..7698752 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -307,6 +307,12 @@ config DEBUG_OBJECTS_WORK work queue routines to track the life time of work objects and validate the work operations. +config DEBUG_OBJECTS_RCU_HEAD + bool "Debug RCU callbacks objects" + depends on DEBUG_OBJECTS + help + Enable this to turn on debugging of RCU list heads (call_rcu() usage). + config DEBUG_OBJECTS_ENABLE_DEFAULT int "debug_objects bootup default value (0-1)" range 0 1 @@ -512,6 +518,31 @@ config PROVE_RCU Say N if you are unsure. +config PROVE_RCU_REPEATEDLY + bool "RCU debugging: don't disable PROVE_RCU on first splat" + depends on PROVE_RCU + default n + help + By itself, PROVE_RCU will disable checking upon issuing the + first warning (or "splat"). This feature prevents such + disabling, allowing multiple RCU-lockdep warnings to be printed + on a single reboot. + +config SPARSE_RCU_POINTER + bool "RCU debugging: sparse-based checks for pointer usage" + default n + help + This feature enables the __rcu sparse annotation for + RCU-protected pointers. This annotation will cause sparse + to flag any non-RCU used of annotated pointers. This can be + helpful when debugging RCU usage. Please note that this feature + is not intended to enforce code cleanliness; it is instead merely + a debugging aid. + + Say Y to make sparse flag questionable use of RCU-protected pointers + + Say N if you are unsure. + config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT @@ -793,7 +824,7 @@ config RCU_CPU_STALL_DETECTOR config RCU_CPU_STALL_VERBOSE bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU - default n + default y help This option causes RCU to printk detailed per-task information for any tasks that are stalling the current RCU grace period. diff --git a/lib/debugobjects.c b/lib/debugobjects.c index b862b30..076464f 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -141,6 +141,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) obj->object = addr; obj->descr = descr; obj->state = ODEBUG_STATE_NONE; + obj->astate = 0; hlist_del(&obj->node); hlist_add_head(&obj->node, &b->list); @@ -252,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg) if (limit < 5 && obj->descr != descr_test) { limit++; - WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg, - obj_states[obj->state], obj->descr->name); + WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) " + "object type: %s\n", + msg, obj_states[obj->state], obj->astate, + obj->descr->name); } debug_objects_warnings++; } @@ -447,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: case ODEBUG_STATE_ACTIVE: - obj->state = ODEBUG_STATE_INACTIVE; + if (!obj->astate) + obj->state = ODEBUG_STATE_INACTIVE; + else + debug_print_object(obj, "deactivate"); break; case ODEBUG_STATE_DESTROYED: @@ -553,6 +559,53 @@ out_unlock: raw_spin_unlock_irqrestore(&db->lock, flags); } +/** + * debug_object_active_state - debug checks object usage state machine + * @addr: address of the object + * @descr: pointer to an object specific debug description structure + * @expect: expected state + * @next: state to move to if expected state is found + */ +void +debug_object_active_state(void *addr, struct debug_obj_descr *descr, + unsigned int expect, unsigned int next) +{ + struct debug_bucket *db; + struct debug_obj *obj; + unsigned long flags; + + if (!debug_objects_enabled) + return; + + db = get_bucket((unsigned long) addr); + + raw_spin_lock_irqsave(&db->lock, flags); + + obj = lookup_object(addr, db); + if (obj) { + switch (obj->state) { + case ODEBUG_STATE_ACTIVE: + if (obj->astate == expect) + obj->astate = next; + else + debug_print_object(obj, "active_state"); + break; + + default: + debug_print_object(obj, "active_state"); + break; + } + } else { + struct debug_obj o = { .object = addr, + .state = ODEBUG_STATE_NOTAVAILABLE, + .descr = descr }; + + debug_print_object(&o, "active_state"); + } + + raw_spin_unlock_irqrestore(&db->lock, flags); +} + #ifdef CONFIG_DEBUG_OBJECTS_FREE static void __debug_check_no_obj_freed(const void *address, unsigned long size) { diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 2a087e0..08f86cc 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -49,7 +49,7 @@ struct radix_tree_node { unsigned int height; /* Height from the bottom */ unsigned int count; struct rcu_head rcu_head; - void *slots[RADIX_TREE_MAP_SIZE]; + void __rcu *slots[RADIX_TREE_MAP_SIZE]; unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 707d0dc..f03d8d6 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -663,7 +663,6 @@ int bdi_init(struct backing_dev_info *bdi) bdi->max_ratio = 100; bdi->max_prop_frac = PROP_FRAC_BASE; spin_lock_init(&bdi->wb_lock); - INIT_RCU_HEAD(&bdi->rcu_head); INIT_LIST_HEAD(&bdi->bdi_list); INIT_LIST_HEAD(&bdi->wb_list); INIT_LIST_HEAD(&bdi->work_list); diff --git a/mm/slob.c b/mm/slob.c index 837ebd6..6de238d 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -647,7 +647,6 @@ void kmem_cache_free(struct kmem_cache *c, void *b) if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { struct slob_rcu *slob_rcu; slob_rcu = b + (c->size - sizeof(struct slob_rcu)); - INIT_RCU_HEAD(&slob_rcu->head); slob_rcu->size = c->size; call_rcu(&slob_rcu->head, kmem_rcu_free); } else { diff --git a/net/802/stp.c b/net/802/stp.c index 53c8f77..978c30b 100644 --- a/net/802/stp.c +++ b/net/802/stp.c @@ -21,8 +21,8 @@ #define GARP_ADDR_MAX 0x2F #define GARP_ADDR_RANGE (GARP_ADDR_MAX - GARP_ADDR_MIN) -static const struct stp_proto *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly; -static const struct stp_proto *stp_proto __read_mostly; +static const struct stp_proto __rcu *garp_protos[GARP_ADDR_RANGE + 1] __read_mostly; +static const struct stp_proto __rcu *stp_proto __read_mostly; static struct llc_sap *sap __read_mostly; static unsigned int sap_registered; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9101a4e..3f66cd1 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -246,7 +246,7 @@ int br_fdb_test_addr(struct net_device *dev, unsigned char *addr) return 0; rcu_read_lock(); - fdb = __br_fdb_get(dev->br_port->br, addr); + fdb = __br_fdb_get(br_port(dev)->br, addr); ret = fdb && fdb->dst->dev != dev && fdb->dst->state == BR_STATE_FORWARDING; rcu_read_unlock(); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 846d7d1..4fedb60 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -229,6 +229,14 @@ static inline int br_is_root_bridge(const struct net_bridge *br) return !memcmp(&br->bridge_id, &br->designated_root, 8); } +static inline struct net_bridge_port *br_port(const struct net_device *dev) +{ + if (!dev) + return NULL; + + return rcu_dereference(dev->br_port); +} + /* br_device.c */ extern void br_dev_setup(struct net_device *dev); extern netdev_tx_t br_dev_xmit(struct sk_buff *skb, diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 9be8fbc..4fa8377 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -25,7 +25,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) if (par->hooknum != NF_BR_BROUTING) memcpy(eth_hdr(skb)->h_dest, - par->in->br_port->br->dev->dev_addr, ETH_ALEN); + br_port(par->in)->br->dev->dev_addr, ETH_ALEN); else memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN); skb->pkt_type = PACKET_HOST; diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index f9560f3..32ca502 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -183,7 +183,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, strcpy(pm->physindev, in->name); /* If in isn't a bridge, then physindev==indev */ if (in->br_port) - strcpy(pm->indev, in->br_port->br->dev->name); + strcpy(pm->indev, br_port(in)->br->dev->name); else strcpy(pm->indev, in->name); } else @@ -192,7 +192,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb, if (out) { /* If out exists, then out is a bridge port */ strcpy(pm->physoutdev, out->name); - strcpy(pm->outdev, out->br_port->br->dev->name); + strcpy(pm->outdev, br_port(out)->br->dev->name); } else pm->outdev[0] = pm->physoutdev[0] = '\0'; diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index f0865fd..ac1361b 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -141,10 +141,10 @@ ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h, if (FWINV2(ebt_dev_check(e->out, out), EBT_IOUT)) return 1; if ((!in || !in->br_port) ? 0 : FWINV2(ebt_dev_check( - e->logical_in, in->br_port->br->dev), EBT_ILOGICALIN)) + e->logical_in, br_port(in)->br->dev), EBT_ILOGICALIN)) return 1; if ((!out || !out->br_port) ? 0 : FWINV2(ebt_dev_check( - e->logical_out, out->br_port->br->dev), EBT_ILOGICALOUT)) + e->logical_out, br_port(out)->br->dev), EBT_ILOGICALOUT)) return 1; if (e->bitmask & EBT_SOURCEMAC) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index fe381d1..7d065cb 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -128,7 +128,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev); static int ipgre_net_id __read_mostly; struct ipgre_net { - struct ip_tunnel *tunnels[4][HASH_SIZE]; + struct ip_tunnel __rcu *tunnels[4][HASH_SIZE]; struct net_device *fb_tunnel_dev; }; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 0b27b14..7b5bc05 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -122,11 +122,11 @@ static int ipip_net_id __read_mostly; struct ipip_net { - struct ip_tunnel *tunnels_r_l[HASH_SIZE]; - struct ip_tunnel *tunnels_r[HASH_SIZE]; - struct ip_tunnel *tunnels_l[HASH_SIZE]; - struct ip_tunnel *tunnels_wc[1]; - struct ip_tunnel **tunnels[4]; + struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_wc[1]; + struct ip_tunnel __rcu **tunnels[4]; struct net_device *fb_tunnel_dev; }; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 4f8bddb..1263f2a 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -38,7 +38,7 @@ static DEFINE_SPINLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto __read_mostly; #define MAX_IP_NAT_PROTO 256 -static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] +static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO] __read_mostly; static inline const struct nf_nat_protocol * diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 542f22f..ac2cf39 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -28,7 +28,7 @@ #include #include -const struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; +const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; static DEFINE_SPINLOCK(inet_proto_lock); /* diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cb562fd..c204b6e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -200,7 +200,7 @@ const __u8 ip_tos2prio[16] = { */ struct rt_hash_bucket { - struct rtable *chain; + struct rtable __rcu *chain; }; #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0f8caf6..2cb54fa 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3057,9 +3057,9 @@ static struct tcp_cookie_secret tcp_secret_one; static struct tcp_cookie_secret tcp_secret_two; /* Essentially a circular list, without dynamic allocation. */ -static struct tcp_cookie_secret *tcp_secret_generating; +static struct tcp_cookie_secret __rcu *tcp_secret_generating; static struct tcp_cookie_secret *tcp_secret_primary; -static struct tcp_cookie_secret *tcp_secret_retiring; +static struct tcp_cookie_secret __rcu *tcp_secret_retiring; static struct tcp_cookie_secret *tcp_secret_secondary; static DEFINE_SPINLOCK(tcp_secret_locker); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2599870..981ed03 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -83,9 +83,9 @@ struct ip6_tnl_net { /* the IPv6 tunnel fallback device */ struct net_device *fb_tnl_dev; /* lists for storing tunnels in use */ - struct ip6_tnl *tnls_r_l[HASH_SIZE]; - struct ip6_tnl *tnls_wc[1]; - struct ip6_tnl **tnls[2]; + struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; + struct ip6_tnl __rcu *tnls_wc[1]; + struct ip6_tnl __rcu **tnls[2]; }; /* diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 1fa3468..dee7e9d 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -25,7 +25,7 @@ #include #include -const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS]; +const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS]; static DEFINE_SPINLOCK(inet6_proto_lock); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5abae10..dc77b54 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -68,11 +68,11 @@ static void ipip6_tunnel_setup(struct net_device *dev); static int sit_net_id __read_mostly; struct sit_net { - struct ip_tunnel *tunnels_r_l[HASH_SIZE]; - struct ip_tunnel *tunnels_r[HASH_SIZE]; - struct ip_tunnel *tunnels_l[HASH_SIZE]; - struct ip_tunnel *tunnels_wc[1]; - struct ip_tunnel **tunnels[4]; + struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_wc[1]; + struct ip_tunnel __rcu **tunnels[4]; struct net_device *fb_tunnel_dev; }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 241533e..08476d2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -192,7 +192,7 @@ struct beacon_data { }; struct ieee80211_if_ap { - struct beacon_data *beacon; + struct beacon_data __rcu *beacon; struct list_head vlans; @@ -214,7 +214,7 @@ struct ieee80211_if_vlan { struct list_head list; /* used for all tx if the VLAN is configured to 4-addr mode */ - struct sta_info *sta; + struct sta_info __rcu *sta; }; struct mesh_stats { @@ -388,7 +388,8 @@ struct ieee80211_if_ibss { unsigned long ibss_join_req; /* probe response/beacon for IBSS */ - struct sk_buff *presp, *skb; + struct sk_buff __rcu *presp; + struct sk_buff *skb; enum { IEEE80211_IBSS_MLME_SEARCH, @@ -492,9 +493,9 @@ struct ieee80211_sub_if_data { #define NUM_DEFAULT_KEYS 4 #define NUM_DEFAULT_MGMT_KEYS 2 - struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; - struct ieee80211_key *default_key; - struct ieee80211_key *default_mgmt_key; + struct ieee80211_key __rcu *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; + struct ieee80211_key __rcu *default_key; + struct ieee80211_key __rcu *default_mgmt_key; u16 sequence_number; @@ -698,7 +699,7 @@ struct ieee80211_local { spinlock_t sta_lock; unsigned long num_sta; struct list_head sta_list, sta_pending_list; - struct sta_info *sta_hash[STA_HASH_SIZE]; + struct sta_info __rcu *sta_hash[STA_HASH_SIZE]; struct timer_list sta_cleanup; struct work_struct sta_finish_work; int sta_generation; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 822d845..8034e8d 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -229,10 +229,10 @@ struct sta_ampdu_mlme { struct sta_info { /* General information, mostly static */ struct list_head list; - struct sta_info *hnext; + struct sta_info __rcu *hnext; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; - struct ieee80211_key *key; + struct ieee80211_key __rcu *key; struct rate_control_ref *rate_ctrl; void *rate_ctrl_priv; spinlock_t lock; diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 78b505d..fdaec7d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -27,7 +27,7 @@ static DEFINE_MUTEX(afinfo_mutex); -const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; +const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL(nf_afinfo); int nf_register_afinfo(const struct nf_afinfo *afinfo) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index f516961..97619fc 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -26,10 +26,10 @@ static DEFINE_MUTEX(nf_ct_ecache_mutex); -struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; +struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); -struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; +struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly; EXPORT_SYMBOL_GPL(nf_expect_event_cb); /* deliver cached events and clear cache entry - must be called with locally diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index fdc8fb4..9a0f75f 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -16,7 +16,7 @@ #include #include -static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM]; +static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM]; static DEFINE_MUTEX(nf_ct_ext_type_mutex); void __nf_ct_ext_destroy(struct nf_conn *ct) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a44fa75..45b8091 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -30,8 +30,8 @@ #include #include -static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly; -struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly; +static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly; +struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_l3protos); static DEFINE_MUTEX(nf_ct_proto_mutex); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 015725a..64ba4d6 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,7 +16,7 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 -static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; +static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index c49ef21..e137c6b 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -17,7 +17,7 @@ * long term mutex. The handler must provide an an outfn() to accept packets * for queueing and must reinject all packets it receives, no matter what. */ -static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly; +static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(queue_handler_mutex); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 203643f..d89c1be 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -404,7 +404,7 @@ __build_packet_message(struct nfulnl_instance *inst, htonl(indev->ifindex)); /* this is the bridge group "brX" */ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_INDEV, - htonl(indev->br_port->br->dev->ifindex)); + htonl(br_port(indev)->br->dev->ifindex)); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ @@ -431,7 +431,7 @@ __build_packet_message(struct nfulnl_instance *inst, htonl(outdev->ifindex)); /* this is the bridge group "brX" */ NLA_PUT_BE32(inst->skb, NFULA_IFINDEX_OUTDEV, - htonl(outdev->br_port->br->dev->ifindex)); + htonl(br_port(outdev)->br->dev->ifindex)); } else { /* Case 2: indev is a bridge group, we need to look * for physical device (when called from ipv4) */ diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e70a6ef..0be9156 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -298,7 +298,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, htonl(indev->ifindex)); /* this is the bridge group "brX" */ NLA_PUT_BE32(skb, NFQA_IFINDEX_INDEV, - htonl(indev->br_port->br->dev->ifindex)); + htonl(br_port(indev)->br->dev->ifindex)); } else { /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ @@ -323,7 +323,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, htonl(outdev->ifindex)); /* this is the bridge group "brX" */ NLA_PUT_BE32(skb, NFQA_IFINDEX_OUTDEV, - htonl(outdev->br_port->br->dev->ifindex)); + htonl(br_port(outdev)->br->dev->ifindex)); } else { /* Case 2: outdev is bridge group, we need to look for * physical output device (when called from ipv4) */ diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index d37b7f8..f6803cf 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -57,8 +57,8 @@ static DEFINE_SPINLOCK(netlbl_domhsh_lock); #define netlbl_domhsh_rcu_deref(p) \ rcu_dereference_check(p, rcu_read_lock_held() || \ lockdep_is_held(&netlbl_domhsh_lock)) -static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; -static struct netlbl_dom_map *netlbl_domhsh_def = NULL; +static struct netlbl_domhsh_tbl __rcu *netlbl_domhsh = NULL; +static struct netlbl_dom_map __rcu *netlbl_domhsh_def = NULL; /* * Domain Hash Table Helper Functions diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index a3d64aa..9c5bc42 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -118,8 +118,8 @@ static DEFINE_SPINLOCK(netlbl_unlhsh_lock); #define netlbl_unlhsh_rcu_deref(p) \ rcu_dereference_check(p, rcu_read_lock_held() || \ lockdep_is_held(&netlbl_unlhsh_lock)) -static struct netlbl_unlhsh_tbl *netlbl_unlhsh = NULL; -static struct netlbl_unlhsh_iface *netlbl_unlhsh_def = NULL; +static struct netlbl_unlhsh_tbl __rcu *netlbl_unlhsh = NULL; +static struct netlbl_unlhsh_iface __rcu *netlbl_unlhsh_def = NULL; /* Accept unlabeled packets flag */ static u8 netlabel_unlabel_acceptflg = 0; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7954243..b917d4a 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -119,7 +119,7 @@ struct nl_pid_hash { struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; - unsigned long *listeners; + unsigned long __rcu *listeners; unsigned int nl_nonroot; unsigned int groups; struct mutex *cb_mutex; diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 73aee7f..e3baaa1 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -35,7 +35,7 @@ #include /* Transport protocol registration */ -static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; +static struct phonet_protocol __rcu *proto_tab[PHONET_NPROTO] __read_mostly; static struct phonet_protocol *phonet_proto_get(int protocol) { diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 9b4ced6..d15b97e 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -36,7 +36,7 @@ struct phonet_routes { struct mutex lock; - struct net_device *table[64]; + struct net_device __rcu *table[64]; }; struct phonet_net { diff --git a/net/socket.c b/net/socket.c index 5e8d0af..18b3427 100644 --- a/net/socket.c +++ b/net/socket.c @@ -155,7 +155,7 @@ static const struct file_operations socket_file_ops = { */ static DEFINE_SPINLOCK(net_family_lock); -static const struct net_proto_family *net_families[NPROTO] __read_mostly; +static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; /* * Statistics counters of the socket lists diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 989fef8..bf4e3bc 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -288,7 +288,6 @@ static struct avc_node *avc_alloc_node(void) if (!node) goto out; - INIT_RCU_HEAD(&node->rhead); INIT_HLIST_NODE(&node->list); avc_cache_stats_incr(allocations); diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c index dc92792..65ebfe9 100644 --- a/security/selinux/netnode.c +++ b/security/selinux/netnode.c @@ -183,8 +183,6 @@ static void sel_netnode_insert(struct sel_netnode *node) BUG(); } - INIT_RCU_HEAD(&node->rcu); - /* we need to impose a limit on the growth of the hash table so check * this bucket to make sure it is within the specified bounds */ list_add_rcu(&node->list, &sel_netnode_hash[idx].list);