diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index 084f6ad..d7884b1 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -1645,7 +1645,9 @@ the amount of locking which needs to be done.
all the readers who were traversing the list when we deleted the
element are finished. We use call_rcu() to
register a callback which will actually destroy the object once
- the readers are finished.
+ all pre-existing readers are finished. Alternatively,
+ synchronize_rcu() may be used to block until
+ all pre-existing are finished.
But how does Read Copy Update know when the readers are
@@ -1714,7 +1716,7 @@ the amount of locking which needs to be done.
- object_put(obj);
+ list_del_rcu(&obj->list);
cache_num--;
-+ call_rcu(&obj->rcu, cache_delete_rcu, obj);
++ call_rcu(&obj->rcu, cache_delete_rcu);
}
/* Must be holding cache_lock */
@@ -1725,14 +1727,6 @@ the amount of locking which needs to be done.
if (++cache_num > MAX_CACHE_SIZE) {
struct object *i, *outcast = NULL;
list_for_each_entry(i, &cache, list) {
-@@ -85,6 +94,7 @@
- obj->popularity = 0;
- atomic_set(&obj->refcnt, 1); /* The cache holds a reference */
- spin_lock_init(&obj->lock);
-+ INIT_RCU_HEAD(&obj->rcu);
-
- spin_lock_irqsave(&cache_lock, flags);
- __cache_add(obj);
@@ -104,12 +114,11 @@
struct object *cache_find(int id)
{
@@ -1922,9 +1916,12 @@ machines due to caching.
mutex_lock()
- There is a mutex_trylock() which can be
- used inside interrupt context, as it will not sleep.
+ There is a mutex_trylock() which does not
+ sleep. Still, it must not be used inside interrupt context since
+ its implementation is not safe for that.
mutex_unlock() will also never sleep.
+ It cannot be used in interrupt context either since a mutex
+ must be released by the same task that acquired it.
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 790d1a8..0c134f8 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -218,13 +218,22 @@ over a rather long period of time, but improvements are always welcome!
include:
a. Keeping a count of the number of data-structure elements
- used by the RCU-protected data structure, including those
- waiting for a grace period to elapse. Enforce a limit
- on this number, stalling updates as needed to allow
- previously deferred frees to complete.
-
- Alternatively, limit only the number awaiting deferred
- free rather than the total number of elements.
+ used by the RCU-protected data structure, including
+ those waiting for a grace period to elapse. Enforce a
+ limit on this number, stalling updates as needed to allow
+ previously deferred frees to complete. Alternatively,
+ limit only the number awaiting deferred free rather than
+ the total number of elements.
+
+ One way to stall the updates is to acquire the update-side
+ mutex. (Don't try this with a spinlock -- other CPUs
+ spinning on the lock could prevent the grace period
+ from ever ending.) Another way to stall the updates
+ is for the updates to use a wrapper function around
+ the memory allocator, so that this wrapper function
+ simulates OOM when there is too much memory awaiting an
+ RCU grace period. There are of course many other
+ variations on this theme.
b. Limiting update rate. For example, if updates occur only
once per hour, then no explicit rate limiting is required,
@@ -365,3 +374,26 @@ over a rather long period of time, but improvements are always welcome!
and the compiler to freely reorder code into and out of RCU
read-side critical sections. It is the responsibility of the
RCU update-side primitives to deal with this.
+
+17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and
+ the __rcu sparse checks to validate your RCU code. These
+ can help find problems as follows:
+
+ CONFIG_PROVE_RCU: check that accesses to RCU-protected data
+ structures are carried out under the proper RCU
+ read-side critical section, while holding the right
+ combination of locks, or whatever other conditions
+ are appropriate.
+
+ CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the
+ same object to call_rcu() (or friends) before an RCU
+ grace period has elapsed since the last time that you
+ passed that same object to call_rcu() (or friends).
+
+ __rcu sparse checks: tag the pointer to the RCU-protected data
+ structure with __rcu, and sparse will warn you if you
+ access that pointer without the services of one of the
+ variants of rcu_dereference().
+
+ These debugging aids can help you find problems that are
+ otherwise extremely difficult to spot.
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 44c6dcc..862c08e 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -80,6 +80,24 @@ o A CPU looping with bottom halves disabled. This condition can
o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel
without invoking schedule().
+o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
+ happen to preempt a low-priority task in the middle of an RCU
+ read-side critical section. This is especially damaging if
+ that low-priority task is not permitted to run on any other CPU,
+ in which case the next RCU grace period can never complete, which
+ will eventually cause the system to run out of memory and hang.
+ While the system is in the process of running itself out of
+ memory, you might see stall-warning messages.
+
+o A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
+ is running at a higher priority than the RCU softirq threads.
+ This will prevent RCU callbacks from ever being invoked,
+ and in a CONFIG_TREE_PREEMPT_RCU kernel will further prevent
+ RCU grace periods from ever completing. Either way, the
+ system will eventually run out of memory and hang. In the
+ CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning
+ messages.
+
o A bug in the RCU implementation.
o A hardware failure. This is quite unlikely, but has occurred
diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg
index 1a07fd6..a7952c2 100644
--- a/Documentation/hwmon/f71882fg
+++ b/Documentation/hwmon/f71882fg
@@ -2,10 +2,6 @@ Kernel driver f71882fg
======================
Supported chips:
- * Fintek F71808E
- Prefix: 'f71808fg'
- Addresses scanned: none, address read from Super I/O config space
- Datasheet: Not public
* Fintek F71858FG
Prefix: 'f71858fg'
Addresses scanned: none, address read from Super I/O config space
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index f6f8025..1565eef 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -1024,6 +1024,10 @@ ThinkPad-specific interface. The driver will disable its native
backlight brightness control interface if it detects that the standard
ACPI interface is available in the ThinkPad.
+If you want to use the thinkpad-acpi backlight brightness control
+instead of the generic ACPI video backlight brightness control for some
+reason, you should use the acpi_backlight=vendor kernel parameter.
+
The brightness_enable module parameter can be used to control whether
the LCD brightness control feature will be enabled when available.
brightness_enable=0 forces it to be disabled. brightness_enable=1
diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt
index 568fa08..302db5d 100644
--- a/Documentation/powerpc/booting-without-of.txt
+++ b/Documentation/powerpc/booting-without-of.txt
@@ -49,40 +49,13 @@ Table of Contents
f) MDIO on GPIOs
g) SPI busses
- VII - Marvell Discovery mv64[345]6x System Controller chips
- 1) The /system-controller node
- 2) Child nodes of /system-controller
- a) Marvell Discovery MDIO bus
- b) Marvell Discovery ethernet controller
- c) Marvell Discovery PHY nodes
- d) Marvell Discovery SDMA nodes
- e) Marvell Discovery BRG nodes
- f) Marvell Discovery CUNIT nodes
- g) Marvell Discovery MPSCROUTING nodes
- h) Marvell Discovery MPSCINTR nodes
- i) Marvell Discovery MPSC nodes
- j) Marvell Discovery Watch Dog Timer nodes
- k) Marvell Discovery I2C nodes
- l) Marvell Discovery PIC (Programmable Interrupt Controller) nodes
- m) Marvell Discovery MPP (Multipurpose Pins) multiplexing nodes
- n) Marvell Discovery GPP (General Purpose Pins) nodes
- o) Marvell Discovery PCI host bridge node
- p) Marvell Discovery CPU Error nodes
- q) Marvell Discovery SRAM Controller nodes
- r) Marvell Discovery PCI Error Handler nodes
- s) Marvell Discovery Memory Controller nodes
-
- VIII - Specifying interrupt information for devices
+ VII - Specifying interrupt information for devices
1) interrupts property
2) interrupt-parent property
3) OpenPIC Interrupt Controllers
4) ISA Interrupt Controllers
- IX - Specifying GPIO information for devices
- 1) gpios property
- 2) gpio-controller nodes
-
- X - Specifying device power management information (sleep property)
+ VIII - Specifying device power management information (sleep property)
Appendix A - Sample SOC node for MPC8540
diff --git a/Documentation/powerpc/hvcs.txt b/Documentation/powerpc/hvcs.txt
index f93462c..6d8be34 100644
--- a/Documentation/powerpc/hvcs.txt
+++ b/Documentation/powerpc/hvcs.txt
@@ -560,7 +560,7 @@ The proper channel for reporting bugs is either through the Linux OS
distribution company that provided your OS or by posting issues to the
PowerPC development mailing list at:
-linuxppc-dev@ozlabs.org
+linuxppc-dev@lists.ozlabs.org
This request is to provide a documented and searchable public exchange
of the problems and solutions surrounding this driver for the benefit of
diff --git a/Kbuild b/Kbuild
index e3737ad..bce37cb 100644
--- a/Kbuild
+++ b/Kbuild
@@ -3,7 +3,19 @@
# This file takes care of the following:
# 1) Generate bounds.h
# 2) Generate asm-offsets.h (may need bounds.h)
-# 3) Check for missing system calls
+# 3) Generate kernel-offsets.h
+# 4) Check for missing system calls
+
+# Default sed regexp - multiline due to syntax constraints
+define sed-y
+ "/^->/{s:->#\(.*\):/* \1 */:; \
+ s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
+ s:->::; p;}"
+endef
+
+quiet_cmd_offsets_cc_s_c = CC $(quiet_modtag) $@
+cmd_offsets_cc_s_c = $(CC) -D__GENARATING_OFFSETS__ \
+ $(c_flags) -fverbose-asm -S -o $@ $<
#####
# 1) Generate bounds.h
@@ -43,22 +55,14 @@ $(obj)/$(bounds-file): kernel/bounds.s Kbuild
# 2) Generate asm-offsets.h
#
-offsets-file := include/generated/asm-offsets.h
+asm-offsets-file := include/generated/asm-offsets.h
-always += $(offsets-file)
-targets += $(offsets-file)
+always += $(asm-offsets-file)
+targets += $(asm-offsets-file)
targets += arch/$(SRCARCH)/kernel/asm-offsets.s
-
-# Default sed regexp - multiline due to syntax constraints
-define sed-y
- "/^->/{s:->#\(.*\):/* \1 */:; \
- s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; \
- s:->::; p;}"
-endef
-
-quiet_cmd_offsets = GEN $@
-define cmd_offsets
+quiet_cmd_asm_offsets = GEN $@
+define cmd_asm_offsets
(set -e; \
echo "#ifndef __ASM_OFFSETS_H__"; \
echo "#define __ASM_OFFSETS_H__"; \
@@ -78,13 +82,48 @@ endef
arch/$(SRCARCH)/kernel/asm-offsets.s: arch/$(SRCARCH)/kernel/asm-offsets.c \
$(obj)/$(bounds-file) FORCE
$(Q)mkdir -p $(dir $@)
- $(call if_changed_dep,cc_s_c)
+ $(call if_changed_dep,offsets_cc_s_c)
+
+$(obj)/$(asm-offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s Kbuild
+ $(call cmd,asm_offsets)
+
+#####
+# 3) Generate kernel-offsets.h
+#
+
+kernel-offsets-file := include/generated/kernel-offsets.h
+
+always += $(kernel-offsets-file)
+targets += $(kernel-offsets-file)
+targets += kernel/kernel-offsets.s
+
+quiet_cmd_kernel_offsets = GEN $@
+define cmd_kernel_offsets
+ (set -e; \
+ echo "#ifndef __LINUX_KERNEL_OFFSETS_H__"; \
+ echo "#define __LINUX_KERNEL_OFFSETS_H__"; \
+ echo "/*"; \
+ echo " * DO NOT MODIFY."; \
+ echo " *"; \
+ echo " * This file was generated by Kbuild"; \
+ echo " *"; \
+ echo " */"; \
+ echo ""; \
+ sed -ne $(sed-y) $<; \
+ echo ""; \
+ echo "#endif" ) > $@
+endef
+
+# We use internal kbuild rules to avoid the "is up to date" message from make
+kernel/kernel-offsets.s: kernel/kernel-offsets.c $(obj)/$(bounds-file) \
+ $(obj)/$(asm-offsets-file) FORCE
+ $(call if_changed_dep,offsets_cc_s_c)
-$(obj)/$(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s Kbuild
- $(call cmd,offsets)
+$(obj)/$(kernel-offsets-file): kernel/kernel-offsets.s Kbuild
+ $(call cmd,kernel_offsets)
#####
-# 3) Check for missing system calls
+# 4) Check for missing system calls
#
quiet_cmd_syscalls = CALL $<
diff --git a/MAINTAINERS b/MAINTAINERS
index b5b8baa..433f353 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -456,7 +456,7 @@ F: drivers/infiniband/hw/amso1100/
AOA (Apple Onboard Audio) ALSA DRIVER
M: Johannes Berg
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Maintained
F: sound/aoa/
@@ -1472,8 +1472,8 @@ F: include/linux/can/platform/
CELL BROADBAND ENGINE ARCHITECTURE
M: Arnd Bergmann
-L: linuxppc-dev@ozlabs.org
-L: cbe-oss-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
+L: cbe-oss-dev@lists.ozlabs.org
W: http://www.ibm.com/developerworks/power/cell/
S: Supported
F: arch/powerpc/include/asm/cell*.h
@@ -2371,13 +2371,13 @@ F: include/linux/fb.h
FREESCALE DMA DRIVER
M: Li Yang
M: Zhang Wei
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: drivers/dma/fsldma.*
FREESCALE I2C CPM DRIVER
M: Jochen Friedrich
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
L: linux-i2c@vger.kernel.org
S: Maintained
F: drivers/i2c/busses/i2c-cpm.c
@@ -2393,7 +2393,7 @@ F: drivers/video/imxfb.c
FREESCALE SOC FS_ENET DRIVER
M: Pantelis Antoniou
M: Vitaly Bordug
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/fs_enet/
@@ -2401,7 +2401,7 @@ F: include/linux/fs_enet_pd.h
FREESCALE QUICC ENGINE LIBRARY
M: Timur Tabi
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
S: Supported
F: arch/powerpc/sysdev/qe_lib/
F: arch/powerpc/include/asm/*qe.h
@@ -2409,27 +2409,27 @@ F: arch/powerpc/include/asm/*qe.h
FREESCALE USB PERIPHERAL DRIVERS
M: Li Yang
L: linux-usb@vger.kernel.org
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: drivers/usb/gadget/fsl*
FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
M: Li Yang
L: netdev@vger.kernel.org
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: drivers/net/ucc_geth*
FREESCALE QUICC ENGINE UCC UART DRIVER
M: Timur Tabi
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
S: Supported
F: drivers/serial/ucc_uart.c
FREESCALE SOC SOUND DRIVERS
M: Timur Tabi
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
S: Supported
F: sound/soc/fsl/fsl*
F: sound/soc/fsl/mpc8610_hpcd.c
@@ -2564,7 +2564,7 @@ F: mm/memory-failure.c
F: mm/hwpoison-inject.c
HYPERVISOR VIRTUAL CONSOLE DRIVER
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
S: Odd Fixes
F: drivers/char/hvc_*
@@ -3476,7 +3476,7 @@ F: drivers/usb/misc/legousbtower.c
LGUEST
M: Rusty Russell
-L: lguest@ozlabs.org
+L: lguest@lists.ozlabs.org
W: http://lguest.ozlabs.org/
S: Maintained
F: Documentation/lguest/
@@ -3495,7 +3495,7 @@ LINUX FOR POWERPC (32-BIT AND 64-BIT)
M: Benjamin Herrenschmidt
M: Paul Mackerras
W: http://www.penguinppc.org/
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
Q: http://patchwork.ozlabs.org/project/linuxppc-dev/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git
S: Supported
@@ -3505,14 +3505,14 @@ F: arch/powerpc/
LINUX FOR POWER MACINTOSH
M: Benjamin Herrenschmidt
W: http://www.penguinppc.org/
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: arch/powerpc/platforms/powermac/
F: drivers/macintosh/
LINUX FOR POWERPC EMBEDDED MPC5XXX
M: Grant Likely
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
T: git git://git.secretlab.ca/git/linux-2.6.git
S: Maintained
F: arch/powerpc/platforms/512x/
@@ -3522,7 +3522,7 @@ LINUX FOR POWERPC EMBEDDED PPC4XX
M: Josh Boyer
M: Matt Porter
W: http://www.penguinppc.org/
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jwboyer/powerpc-4xx.git
S: Maintained
F: arch/powerpc/platforms/40x/
@@ -3531,7 +3531,7 @@ F: arch/powerpc/platforms/44x/
LINUX FOR POWERPC EMBEDDED XILINX VIRTEX
M: Grant Likely
W: http://wiki.secretlab.ca/index.php/Linux_on_Xilinx_Virtex
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
T: git git://git.secretlab.ca/git/linux-2.6.git
S: Maintained
F: arch/powerpc/*/*virtex*
@@ -3541,20 +3541,20 @@ LINUX FOR POWERPC EMBEDDED PPC8XX
M: Vitaly Bordug
M: Marcelo Tosatti
W: http://www.penguinppc.org/
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: arch/powerpc/platforms/8xx/
LINUX FOR POWERPC EMBEDDED PPC83XX AND PPC85XX
M: Kumar Gala
W: http://www.penguinppc.org/
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: arch/powerpc/platforms/83xx/
LINUX FOR POWERPC PA SEMI PWRFICIENT
M: Olof Johansson
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
S: Maintained
F: arch/powerpc/platforms/pasemi/
F: drivers/*/*pasemi*
@@ -4601,14 +4601,14 @@ F: drivers/ata/sata_promise.*
PS3 NETWORK SUPPORT
M: Geoff Levand
L: netdev@vger.kernel.org
-L: cbe-oss-dev@ozlabs.org
+L: cbe-oss-dev@lists.ozlabs.org
S: Maintained
F: drivers/net/ps3_gelic_net.*
PS3 PLATFORM SUPPORT
M: Geoff Levand
-L: linuxppc-dev@ozlabs.org
-L: cbe-oss-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
+L: cbe-oss-dev@lists.ozlabs.org
S: Maintained
F: arch/powerpc/boot/ps3*
F: arch/powerpc/include/asm/lv1call.h
@@ -4622,7 +4622,7 @@ F: sound/ppc/snd_ps3*
PS3VRAM DRIVER
M: Jim Paris
-L: cbe-oss-dev@ozlabs.org
+L: cbe-oss-dev@lists.ozlabs.org
S: Maintained
F: drivers/block/ps3vram.c
@@ -5068,7 +5068,7 @@ F: drivers/mmc/host/sdhci.*
SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF)
M: Anton Vorontsov
-L: linuxppc-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
L: linux-mmc@vger.kernel.org
S: Maintained
F: drivers/mmc/host/sdhci-of.*
@@ -5485,8 +5485,8 @@ F: drivers/net/spider_net*
SPU FILE SYSTEM
M: Jeremy Kerr
-L: linuxppc-dev@ozlabs.org
-L: cbe-oss-dev@ozlabs.org
+L: linuxppc-dev@lists.ozlabs.org
+L: cbe-oss-dev@lists.ozlabs.org
W: http://www.ibm.com/developerworks/power/cell/
S: Supported
F: Documentation/filesystems/spufs.txt
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 88e608a..842dba3 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -387,8 +387,9 @@ EXPORT_SYMBOL(dump_elf_task_fp);
* sys_execve() executes a new program.
*/
asmlinkage int
-do_sys_execve(const char __user *ufilename, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs *regs)
+do_sys_execve(const char __user *ufilename,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp, struct pt_regs *regs)
{
int error;
char *filename;
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 99b8200..59c1ce8 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -21,6 +21,9 @@ GZFLAGS :=-9
# Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb:
KBUILD_CFLAGS +=$(call cc-option,-marm,)
+# Never generate .eh_frame
+KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm)
+
# Do not use arch/arm/defconfig - it's always outdated.
# Select a platform tht is kept up-to-date
KBUILD_DEFCONFIG := versatile_defconfig
diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h
index c974be8..7ce15eb 100644
--- a/arch/arm/include/asm/ptrace.h
+++ b/arch/arm/include/asm/ptrace.h
@@ -158,15 +158,24 @@ struct pt_regs {
*/
static inline int valid_user_regs(struct pt_regs *regs)
{
- if (user_mode(regs) && (regs->ARM_cpsr & PSR_I_BIT) == 0) {
- regs->ARM_cpsr &= ~(PSR_F_BIT | PSR_A_BIT);
- return 1;
+ unsigned long mode = regs->ARM_cpsr & MODE_MASK;
+
+ /*
+ * Always clear the F (FIQ) and A (delayed abort) bits
+ */
+ regs->ARM_cpsr &= ~(PSR_F_BIT | PSR_A_BIT);
+
+ if ((regs->ARM_cpsr & PSR_I_BIT) == 0) {
+ if (mode == USR_MODE)
+ return 1;
+ if (elf_hwcap & HWCAP_26BIT && mode == USR26_MODE)
+ return 1;
}
/*
* Force CPSR to something logical...
*/
- regs->ARM_cpsr &= PSR_f | PSR_s | (PSR_x & ~PSR_A_BIT) | PSR_T_BIT | MODE32_BIT;
+ regs->ARM_cpsr &= PSR_f | PSR_s | PSR_x | PSR_T_BIT | MODE32_BIT;
if (!(elf_hwcap & HWCAP_26BIT))
regs->ARM_cpsr |= USR_MODE;
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index dd2bf53..d02cfb6 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -392,6 +392,7 @@
#define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363)
#define __NR_perf_event_open (__NR_SYSCALL_BASE+364)
#define __NR_recvmmsg (__NR_SYSCALL_BASE+365)
+#define __NR_accept4 (__NR_SYSCALL_BASE+366)
/*
* The following SWIs are ARM private.
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 37ae301..afeb71f 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -375,6 +375,7 @@
CALL(sys_rt_tgsigqueueinfo)
CALL(sys_perf_event_open)
/* 365 */ CALL(sys_recvmmsg)
+ CALL(sys_accept4)
#ifndef syscalls_counted
.equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
#define syscalls_counted
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index 778c2f7..d6e8b4d 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -79,7 +79,7 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
return;
/* Initialize to zero */
- for (regno = 0; regno < GDB_MAX_REGS; regno++)
+ for (regno = 0; regno < DBG_MAX_REG_NUM; regno++)
gdb_regs[regno] = 0;
/* Otherwise, we have only some registers from switch_to() */
diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
index 5b7c541..62e7c61 100644
--- a/arch/arm/kernel/sys_arm.c
+++ b/arch/arm/kernel/sys_arm.c
@@ -62,8 +62,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs)
/* sys_execve() executes a new program.
* This is called indirectly via a small wrapper
*/
-asmlinkage int sys_execve(const char __user *filenamei, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs *regs)
+asmlinkage int sys_execve(const char __user *filenamei,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp, struct pt_regs *regs)
{
int error;
char * filename;
@@ -78,14 +79,17 @@ out:
return error;
}
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
struct pt_regs regs;
int ret;
memset(®s, 0, sizeof(struct pt_regs));
- ret = do_execve(filename, (char __user * __user *)argv,
- (char __user * __user *)envp, ®s);
+ ret = do_execve(filename,
+ (const char __user *const __user *)argv,
+ (const char __user *const __user *)envp, ®s);
if (ret < 0)
goto out;
diff --git a/arch/arm/plat-samsung/dev-hsmmc.c b/arch/arm/plat-samsung/dev-hsmmc.c
index b0f93f1..9d2be09 100644
--- a/arch/arm/plat-samsung/dev-hsmmc.c
+++ b/arch/arm/plat-samsung/dev-hsmmc.c
@@ -70,4 +70,6 @@ void s3c_sdhci0_set_platdata(struct s3c_sdhci_platdata *pd)
set->cfg_gpio = pd->cfg_gpio;
if (pd->cfg_card)
set->cfg_card = pd->cfg_card;
+ if (pd->host_caps)
+ set->host_caps = pd->host_caps;
}
diff --git a/arch/arm/plat-samsung/dev-hsmmc1.c b/arch/arm/plat-samsung/dev-hsmmc1.c
index 1504fd8..a6c8295 100644
--- a/arch/arm/plat-samsung/dev-hsmmc1.c
+++ b/arch/arm/plat-samsung/dev-hsmmc1.c
@@ -70,4 +70,6 @@ void s3c_sdhci1_set_platdata(struct s3c_sdhci_platdata *pd)
set->cfg_gpio = pd->cfg_gpio;
if (pd->cfg_card)
set->cfg_card = pd->cfg_card;
+ if (pd->host_caps)
+ set->host_caps = pd->host_caps;
}
diff --git a/arch/arm/plat-samsung/dev-hsmmc2.c b/arch/arm/plat-samsung/dev-hsmmc2.c
index b28ef17..cb0d714 100644
--- a/arch/arm/plat-samsung/dev-hsmmc2.c
+++ b/arch/arm/plat-samsung/dev-hsmmc2.c
@@ -71,4 +71,6 @@ void s3c_sdhci2_set_platdata(struct s3c_sdhci_platdata *pd)
set->cfg_gpio = pd->cfg_gpio;
if (pd->cfg_card)
set->cfg_card = pd->cfg_card;
+ if (pd->host_caps)
+ set->host_caps = pd->host_caps;
}
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index e5daddf..9c46aaa 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -384,8 +384,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs)
}
asmlinkage int sys_execve(const char __user *ufilename,
- char __user *__user *uargv,
- char __user *__user *uenvp, struct pt_regs *regs)
+ const char __user *const __user *uargv,
+ const char __user *const __user *uenvp,
+ struct pt_regs *regs)
{
int error;
char *filename;
diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c
index 459349b..62635a0 100644
--- a/arch/avr32/kernel/sys_avr32.c
+++ b/arch/avr32/kernel/sys_avr32.c
@@ -7,7 +7,9 @@
*/
#include
-int kernel_execve(const char *file, char **argv, char **envp)
+int kernel_execve(const char *file,
+ const char *const *argv,
+ const char *const *envp)
{
register long scno asm("r8") = __NR_execve;
register long sc1 asm("r12") = (long)file;
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index a566f61..01f98cb 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -209,7 +209,9 @@ copy_thread(unsigned long clone_flags,
/*
* sys_execve() executes a new program.
*/
-asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp)
+asmlinkage int sys_execve(const char __user *name,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp)
{
int error;
char *filename;
diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c
index 93f0f64..9a57db6 100644
--- a/arch/cris/arch-v10/kernel/process.c
+++ b/arch/cris/arch-v10/kernel/process.c
@@ -204,7 +204,9 @@ asmlinkage int sys_vfork(long r10, long r11, long r12, long r13, long mof, long
/*
* sys_execve() executes a new program.
*/
-asmlinkage int sys_execve(const char *fname, char **argv, char **envp,
+asmlinkage int sys_execve(const char *fname,
+ const char *const *argv,
+ const char *const *envp,
long r13, long mof, long srp,
struct pt_regs *regs)
{
diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c
index 2661a95..562f847 100644
--- a/arch/cris/arch-v32/kernel/process.c
+++ b/arch/cris/arch-v32/kernel/process.c
@@ -218,8 +218,10 @@ sys_vfork(long r10, long r11, long r12, long r13, long mof, long srp,
/* sys_execve() executes a new program. */
asmlinkage int
-sys_execve(const char *fname, char **argv, char **envp, long r13, long mof, long srp,
- struct pt_regs *regs)
+sys_execve(const char *fname,
+ const char *const *argv,
+ const char *const *envp, long r13, long mof, long srp,
+ struct pt_regs *regs)
{
int error;
char *filename;
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 428931c..2b63b01 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -250,8 +250,9 @@ int copy_thread(unsigned long clone_flags,
/*
* sys_execve() executes a new program.
*/
-asmlinkage int sys_execve(const char __user *name, char __user * __user *argv,
- char __user * __user *envp)
+asmlinkage int sys_execve(const char __user *name,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp)
{
int error;
char * filename;
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index 8b7b78d..9747813 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -212,7 +212,10 @@ int copy_thread(unsigned long clone_flags,
/*
* sys_execve() executes a new program.
*/
-asmlinkage int sys_execve(const char *name, char **argv, char **envp,int dummy,...)
+asmlinkage int sys_execve(const char *name,
+ const char *const *argv,
+ const char *const *envp,
+ int dummy, ...)
{
int error;
char * filename;
diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
index f9b3f44..dc1ac02 100644
--- a/arch/h8300/kernel/sys_h8300.c
+++ b/arch/h8300/kernel/sys_h8300.c
@@ -51,7 +51,9 @@ asmlinkage void syscall_print(void *dummy,...)
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
register long res __asm__("er0");
register char *const *_c __asm__("er3") = envp;
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 87f1bd1..954d398 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -356,8 +356,6 @@ asmlinkage unsigned long sys_mmap2(
int fd, long pgoff);
struct pt_regs;
struct sigaction;
-long sys_execve(const char __user *filename, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs *regs);
asmlinkage long sys_ia64_pipe(void);
asmlinkage long sys_rt_sigaction(int sig,
const struct sigaction __user *act,
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index a879c03..16f1c7b 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -633,7 +633,9 @@ dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
}
long
-sys_execve (const char __user *filename, char __user * __user *argv, char __user * __user *envp,
+sys_execve (const char __user *filename,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp,
struct pt_regs *regs)
{
char *fname;
diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c
index 8665a4d..422bea9 100644
--- a/arch/m32r/kernel/process.c
+++ b/arch/m32r/kernel/process.c
@@ -289,8 +289,8 @@ asmlinkage int sys_vfork(unsigned long r0, unsigned long r1, unsigned long r2,
* sys_execve() executes a new program.
*/
asmlinkage int sys_execve(const char __user *ufilename,
- char __user * __user *uargv,
- char __user * __user *uenvp,
+ const char __user *const __user *uargv,
+ const char __user *const __user *uenvp,
unsigned long r3, unsigned long r4, unsigned long r5,
unsigned long r6, struct pt_regs regs)
{
diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
index 0a00f46..d841fb6 100644
--- a/arch/m32r/kernel/sys_m32r.c
+++ b/arch/m32r/kernel/sys_m32r.c
@@ -93,7 +93,9 @@ asmlinkage int sys_cachectl(char *addr, int nbytes, int op)
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
register long __scno __asm__ ("r7") = __NR_execve;
register long __arg3 __asm__ ("r2") = (long)(envp);
diff --git a/arch/m68k/include/asm/ide.h b/arch/m68k/include/asm/ide.h
index 3958726..492fee8 100644
--- a/arch/m68k/include/asm/ide.h
+++ b/arch/m68k/include/asm/ide.h
@@ -1,6 +1,4 @@
/*
- * linux/include/asm-m68k/ide.h
- *
* Copyright (C) 1994-1996 Linus Torvalds & authors
*/
@@ -34,6 +32,8 @@
#include
#include
+#ifdef CONFIG_MMU
+
/*
* Get rid of defs from io.h - ide has its private and conflicting versions
* Since so far no single m68k platform uses ISA/PCI I/O space for IDE, we
@@ -53,5 +53,14 @@
#define __ide_mm_outsw(port, addr, n) raw_outsw((u16 *)port, addr, n)
#define __ide_mm_outsl(port, addr, n) raw_outsl((u32 *)port, addr, n)
+#else
+
+#define __ide_mm_insw(port, addr, n) io_insw((unsigned int)port, addr, n)
+#define __ide_mm_insl(port, addr, n) io_insl((unsigned int)port, addr, n)
+#define __ide_mm_outsw(port, addr, n) io_outsw((unsigned int)port, addr, n)
+#define __ide_mm_outsl(port, addr, n) io_outsl((unsigned int)port, addr, n)
+
+#endif /* CONFIG_MMU */
+
#endif /* __KERNEL__ */
#endif /* _M68K_IDE_H */
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 221d0b7..18732ab 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -315,7 +315,9 @@ EXPORT_SYMBOL(dump_fpu);
/*
* sys_execve() executes a new program.
*/
-asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp)
+asmlinkage int sys_execve(const char __user *name,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp)
{
int error;
char * filename;
diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
index 7789669..2f431ec 100644
--- a/arch/m68k/kernel/sys_m68k.c
+++ b/arch/m68k/kernel/sys_m68k.c
@@ -459,7 +459,9 @@ asmlinkage int sys_getpagesize(void)
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
register long __res asm ("%d0") = __NR_execve;
register long __a asm ("%d1") = (long)(filename);
diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c
index 6350f68..6d33905 100644
--- a/arch/m68knommu/kernel/process.c
+++ b/arch/m68knommu/kernel/process.c
@@ -316,14 +316,14 @@ void dump(struct pt_regs *fp)
fp->d0, fp->d1, fp->d2, fp->d3);
printk(KERN_EMERG "d4: %08lx d5: %08lx a0: %08lx a1: %08lx\n",
fp->d4, fp->d5, fp->a0, fp->a1);
- printk(KERN_EMERG "\nUSP: %08x TRAPFRAME: %08x\n",
- (unsigned int) rdusp(), (unsigned int) fp);
+ printk(KERN_EMERG "\nUSP: %08x TRAPFRAME: %p\n",
+ (unsigned int) rdusp(), fp);
printk(KERN_EMERG "\nCODE:");
tp = ((unsigned char *) fp->pc) - 0x20;
for (sp = (unsigned long *) tp, i = 0; (i < 0x40); i += 4) {
if ((i % 0x10) == 0)
- printk(KERN_EMERG "%08x: ", (int) (tp + i));
+ printk(KERN_EMERG "%p: ", tp + i);
printk("%08x ", (int) *sp++);
}
printk(KERN_EMERG "\n");
@@ -332,7 +332,7 @@ void dump(struct pt_regs *fp)
tp = ((unsigned char *) fp) - 0x40;
for (sp = (unsigned long *) tp, i = 0; (i < 0xc0); i += 4) {
if ((i % 0x10) == 0)
- printk(KERN_EMERG "%08x: ", (int) (tp + i));
+ printk(KERN_EMERG "%p: ", tp + i);
printk("%08x ", (int) *sp++);
}
printk(KERN_EMERG "\n");
@@ -341,7 +341,7 @@ void dump(struct pt_regs *fp)
tp = (unsigned char *) (rdusp() - 0x10);
for (sp = (unsigned long *) tp, i = 0; (i < 0x80); i += 4) {
if ((i % 0x10) == 0)
- printk(KERN_EMERG "%08x: ", (int) (tp + i));
+ printk(KERN_EMERG "%p: ", tp + i);
printk("%08x ", (int) *sp++);
}
printk(KERN_EMERG "\n");
@@ -350,7 +350,9 @@ void dump(struct pt_regs *fp)
/*
* sys_execve() executes a new program.
*/
-asmlinkage int sys_execve(const char *name, char **argv, char **envp)
+asmlinkage int sys_execve(const char *name,
+ const char *const *argv,
+ const char *const *envp)
{
int error;
char * filename;
diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
index d65e9c4..68488ae 100644
--- a/arch/m68knommu/kernel/sys_m68k.c
+++ b/arch/m68knommu/kernel/sys_m68k.c
@@ -44,7 +44,9 @@ asmlinkage int sys_getpagesize(void)
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
register long __res asm ("%d0") = __NR_execve;
register long __a asm ("%d1") = (long)(filename);
diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c
index d33ba17..99d9b61 100644
--- a/arch/microblaze/kernel/prom_parse.c
+++ b/arch/microblaze/kernel/prom_parse.c
@@ -73,7 +73,7 @@ int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq)
/* We can only get here if we hit a P2P bridge with no node,
* let's do standard swizzling and try again
*/
- lspec = of_irq_pci_swizzle(PCI_SLOT(pdev->devfn), lspec);
+ lspec = pci_swizzle_interrupt_pin(pdev, lspec);
pdev = ppdev;
}
diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c
index 6abab6e..2250fe9 100644
--- a/arch/microblaze/kernel/sys_microblaze.c
+++ b/arch/microblaze/kernel/sys_microblaze.c
@@ -47,8 +47,10 @@ asmlinkage long microblaze_clone(int flags, unsigned long stack, struct pt_regs
return do_fork(flags, stack, regs, 0, NULL, NULL);
}
-asmlinkage long microblaze_execve(const char __user *filenamei, char __user *__user *argv,
- char __user *__user *envp, struct pt_regs *regs)
+asmlinkage long microblaze_execve(const char __user *filenamei,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp,
+ struct pt_regs *regs)
{
int error;
char *filename;
@@ -77,7 +79,9 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len,
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
register const char *__a __asm__("r5") = filename;
register const void *__b __asm__("r6") = argv;
diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c
index 23be25f..55ef532 100644
--- a/arch/microblaze/pci/pci-common.c
+++ b/arch/microblaze/pci/pci-common.c
@@ -27,10 +27,11 @@
#include
#include
#include
+#include
+#include
#include
#include
-#include
#include
#include
@@ -1077,7 +1078,7 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus)
struct dev_archdata *sd = &dev->dev.archdata;
/* Setup OF node pointer in archdata */
- sd->of_node = pci_device_to_OF_node(dev);
+ dev->dev.of_node = pci_device_to_OF_node(dev);
/* Fixup NUMA node as it may not be setup yet by the generic
* code and is needed by the DMA init
diff --git a/arch/microblaze/pci/xilinx_pci.c b/arch/microblaze/pci/xilinx_pci.c
index 7869a41..0687a42 100644
--- a/arch/microblaze/pci/xilinx_pci.c
+++ b/arch/microblaze/pci/xilinx_pci.c
@@ -16,6 +16,7 @@
#include
#include
+#include
#include
#include
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index bddce0b..1dc6edf 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -258,8 +258,10 @@ asmlinkage int sys_execve(nabi_no_regargs struct pt_regs regs)
error = PTR_ERR(filename);
if (IS_ERR(filename))
goto out;
- error = do_execve(filename, (char __user *__user *) (long)regs.regs[5],
- (char __user *__user *) (long)regs.regs[6], ®s);
+ error = do_execve(filename,
+ (const char __user *const __user *) (long)regs.regs[5],
+ (const char __user *const __user *) (long)regs.regs[6],
+ ®s);
putname(filename);
out:
@@ -436,7 +438,9 @@ asmlinkage void bad_stack(void)
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
register unsigned long __a0 asm("$4") = (unsigned long) filename;
register unsigned long __a1 asm("$5") = (unsigned long) argv;
diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c
index 762eb32..f48373e 100644
--- a/arch/mn10300/kernel/process.c
+++ b/arch/mn10300/kernel/process.c
@@ -269,8 +269,8 @@ asmlinkage long sys_vfork(void)
}
asmlinkage long sys_execve(const char __user *name,
- char __user * __user *argv,
- char __user * __user *envp)
+ const char __user *const __user *argv,
+ const char __user *const __user *envp)
{
char *filename;
int error;
diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c
index 1444875..0dc8543 100644
--- a/arch/parisc/hpux/fs.c
+++ b/arch/parisc/hpux/fs.c
@@ -41,8 +41,10 @@ int hpux_execve(struct pt_regs *regs)
if (IS_ERR(filename))
goto out;
- error = do_execve(filename, (char __user * __user *) regs->gr[25],
- (char __user * __user *) regs->gr[24], regs);
+ error = do_execve(filename,
+ (const char __user *const __user *) regs->gr[25],
+ (const char __user *const __user *) regs->gr[24],
+ regs);
putname(filename);
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 76332da..4b4b918 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -348,17 +348,22 @@ asmlinkage int sys_execve(struct pt_regs *regs)
error = PTR_ERR(filename);
if (IS_ERR(filename))
goto out;
- error = do_execve(filename, (char __user * __user *) regs->gr[25],
- (char __user * __user *) regs->gr[24], regs);
+ error = do_execve(filename,
+ (const char __user *const __user *) regs->gr[25],
+ (const char __user *const __user *) regs->gr[24],
+ regs);
putname(filename);
out:
return error;
}
-extern int __execve(const char *filename, char *const argv[],
- char *const envp[], struct task_struct *task);
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+extern int __execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[], struct task_struct *task);
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
return __execve(filename, argv, envp, current);
}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index feacfb7..91356ff 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1034,8 +1034,9 @@ int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2,
flush_fp_to_thread(current);
flush_altivec_to_thread(current);
flush_spe_to_thread(current);
- error = do_execve(filename, (char __user * __user *) a1,
- (char __user * __user *) a2, regs);
+ error = do_execve(filename,
+ (const char __user *const __user *) a1,
+ (const char __user *const __user *) a2, regs);
putname(filename);
out:
return error;
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 7eafaf2..d3a2d1c 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -267,8 +267,9 @@ asmlinkage void execve_tail(void)
/*
* sys_execve() executes a new program.
*/
-SYSCALL_DEFINE3(execve, const char __user *, name, char __user * __user *, argv,
- char __user * __user *, envp)
+SYSCALL_DEFINE3(execve, const char __user *, name,
+ const char __user *const __user *, argv,
+ const char __user *const __user *, envp)
{
struct pt_regs *regs = task_pt_regs(current);
char *filename;
diff --git a/arch/score/kernel/sys_score.c b/arch/score/kernel/sys_score.c
index 651096f..e478bf9 100644
--- a/arch/score/kernel/sys_score.c
+++ b/arch/score/kernel/sys_score.c
@@ -99,8 +99,10 @@ score_execve(struct pt_regs *regs)
if (IS_ERR(filename))
return error;
- error = do_execve(filename, (char __user *__user*)regs->regs[5],
- (char __user *__user *) regs->regs[6], regs);
+ error = do_execve(filename,
+ (const char __user *const __user *)regs->regs[5],
+ (const char __user *const __user *)regs->regs[6],
+ regs);
putname(filename);
return error;
@@ -110,7 +112,9 @@ score_execve(struct pt_regs *regs)
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
register unsigned long __r4 asm("r4") = (unsigned long) filename;
register unsigned long __r5 asm("r5") = (unsigned long) argv;
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 0529819..762a139 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -296,9 +296,10 @@ asmlinkage int sys_vfork(unsigned long r4, unsigned long r5,
/*
* sys_execve() executes a new program.
*/
-asmlinkage int sys_execve(char __user *ufilename, char __user * __user *uargv,
- char __user * __user *uenvp, unsigned long r7,
- struct pt_regs __regs)
+asmlinkage int sys_execve(const char __user *ufilename,
+ const char __user *const __user *uargv,
+ const char __user *const __user *uenvp,
+ unsigned long r7, struct pt_regs __regs)
{
struct pt_regs *regs = RELOC_HIDE(&__regs, 0);
int error;
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 68d128d..210c1ca 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -497,8 +497,8 @@ asmlinkage int sys_execve(const char *ufilename, char **uargv,
goto out;
error = do_execve(filename,
- (char __user * __user *)uargv,
- (char __user * __user *)uenvp,
+ (const char __user *const __user *)uargv,
+ (const char __user *const __user *)uenvp,
pregs);
putname(filename);
out:
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index eb68bfd..f56b6fe 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -71,7 +71,9 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1,
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
register long __sc0 __asm__ ("r3") = __NR_execve;
register long __sc4 __asm__ ("r4") = (long) filename;
diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c
index 2872357..c5a38c4 100644
--- a/arch/sh/kernel/sys_sh64.c
+++ b/arch/sh/kernel/sys_sh64.c
@@ -33,7 +33,9 @@
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_execve);
register unsigned long __sc2 __asm__ ("r2") = (unsigned long) filename;
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 2050ca0..f0c7422 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -25,9 +25,9 @@ extern void atomic_sub(int, atomic_t *);
extern void atomic64_sub(int, atomic64_t *);
extern int atomic_add_ret(int, atomic_t *);
-extern int atomic64_add_ret(int, atomic64_t *);
+extern long atomic64_add_ret(int, atomic64_t *);
extern int atomic_sub_ret(int, atomic_t *);
-extern int atomic64_sub_ret(int, atomic64_t *);
+extern long atomic64_sub_ret(int, atomic64_t *);
#define atomic_dec_return(v) atomic_sub_ret(1, v)
#define atomic64_dec_return(v) atomic64_sub_ret(1, v)
@@ -91,7 +91,7 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n)))
#define atomic64_xchg(v, new) (xchg(&((v)->counter), new))
-static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
+static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
{
long c, old;
c = atomic64_read(v);
diff --git a/arch/sparc/include/asm/fb.h b/arch/sparc/include/asm/fb.h
index e834880..2173432 100644
--- a/arch/sparc/include/asm/fb.h
+++ b/arch/sparc/include/asm/fb.h
@@ -1,5 +1,6 @@
#ifndef _SPARC_FB_H_
#define _SPARC_FB_H_
+#include
#include
#include
#include
@@ -18,6 +19,9 @@ static inline int fb_is_primary_device(struct fb_info *info)
struct device *dev = info->device;
struct device_node *node;
+ if (console_set_on_cmdline)
+ return 0;
+
node = dev->of_node;
if (node &&
node == of_console_device)
diff --git a/arch/sparc/include/asm/rwsem-const.h b/arch/sparc/include/asm/rwsem-const.h
index a303c9d..e4c61a1 100644
--- a/arch/sparc/include/asm/rwsem-const.h
+++ b/arch/sparc/include/asm/rwsem-const.h
@@ -5,7 +5,7 @@
#define RWSEM_UNLOCKED_VALUE 0x00000000
#define RWSEM_ACTIVE_BIAS 0x00000001
#define RWSEM_ACTIVE_MASK 0x0000ffff
-#define RWSEM_WAITING_BIAS 0xffff0000
+#define RWSEM_WAITING_BIAS (-0x00010000)
#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h
index d0b3b01..03eb5a8 100644
--- a/arch/sparc/include/asm/unistd.h
+++ b/arch/sparc/include/asm/unistd.h
@@ -397,8 +397,11 @@
#define __NR_rt_tgsigqueueinfo 326
#define __NR_perf_event_open 327
#define __NR_recvmmsg 328
+#define __NR_fanotify_init 329
+#define __NR_fanotify_mark 330
+#define __NR_prlimit64 331
-#define NR_syscalls 329
+#define NR_syscalls 332
#ifdef __32bit_syscall_numbers__
/* Sparc 32-bit only has the "setresuid32", "getresuid32" variants,
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 40e29fc..1752929 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -633,8 +633,10 @@ asmlinkage int sparc_execve(struct pt_regs *regs)
if(IS_ERR(filename))
goto out;
error = do_execve(filename,
- (char __user * __user *)regs->u_regs[base + UREG_I1],
- (char __user * __user *)regs->u_regs[base + UREG_I2],
+ (const char __user *const __user *)
+ regs->u_regs[base + UREG_I1],
+ (const char __user *const __user *)
+ regs->u_regs[base + UREG_I2],
regs);
putname(filename);
out:
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index dbe81a3..485f547 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -739,9 +739,9 @@ asmlinkage int sparc_execve(struct pt_regs *regs)
if (IS_ERR(filename))
goto out;
error = do_execve(filename,
- (char __user * __user *)
+ (const char __user *const __user *)
regs->u_regs[base + UREG_I1],
- (char __user * __user *)
+ (const char __user *const __user *)
regs->u_regs[base + UREG_I2], regs);
putname(filename);
if (!error) {
diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S
index 46a76ba..44e5faf 100644
--- a/arch/sparc/kernel/sys32.S
+++ b/arch/sparc/kernel/sys32.S
@@ -330,6 +330,15 @@ do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */
nop
nop
+ .globl sys32_fanotify_mark
+sys32_fanotify_mark:
+ sethi %hi(sys_fanotify_mark), %g1
+ sllx %o2, 32, %o2
+ or %o2, %o3, %o2
+ mov %o4, %o3
+ jmpl %g1 + %lo(sys_fanotify_mark), %g0
+ mov %o5, %o4
+
.section __ex_table,"a"
.align 4
.word 1b, __retl_efault, 2b, __retl_efault
diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c
index ee995b7..5079413 100644
--- a/arch/sparc/kernel/sys_sparc_32.c
+++ b/arch/sparc/kernel/sys_sparc_32.c
@@ -282,7 +282,9 @@ out:
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
long __res;
register long __g1 __asm__ ("g1") = __NR_execve;
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 3d435c4..f836f4e 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -758,7 +758,9 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act,
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
long __res;
register long __g1 __asm__ ("g1") = __NR_execve;
diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S
index 801fc8e..ec396e1 100644
--- a/arch/sparc/kernel/systbls_32.S
+++ b/arch/sparc/kernel/systbls_32.S
@@ -82,5 +82,6 @@ sys_call_table:
/*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
/*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
-/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg
+/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
+/*330*/ .long sys_fanotify_mark, sys_prlimit64
diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S
index 9db058d..8cfcaa5 100644
--- a/arch/sparc/kernel/systbls_64.S
+++ b/arch/sparc/kernel/systbls_64.S
@@ -83,7 +83,8 @@ sys_call_table32:
/*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate
.word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv
- .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg
+ .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init
+/*330*/ .word sys32_fanotify_mark, sys_prlimit64
#endif /* CONFIG_COMPAT */
@@ -158,4 +159,5 @@ sys_call_table:
/*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate
.word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1
/*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv
- .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg
+ .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init
+/*330*/ .word sys_fanotify_mark, sys_prlimit64
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index ed590ad..985cc28 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -543,8 +543,9 @@ long _sys_vfork(struct pt_regs *regs)
/*
* sys_execve() executes a new program.
*/
-long _sys_execve(char __user *path, char __user *__user *argv,
- char __user *__user *envp, struct pt_regs *regs)
+long _sys_execve(const char __user *path,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp, struct pt_regs *regs)
{
long error;
char *filename;
diff --git a/arch/um/include/asm/dma-mapping.h b/arch/um/include/asm/dma-mapping.h
index 17a2cb5..1f469e8 100644
--- a/arch/um/include/asm/dma-mapping.h
+++ b/arch/um/include/asm/dma-mapping.h
@@ -95,13 +95,6 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems,
#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
-static inline int
-dma_get_cache_alignment(void)
-{
- BUG();
- return(0);
-}
-
static inline void
dma_cache_sync(struct device *dev, void *vaddr, size_t size,
enum dma_data_direction direction)
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 59b20d9..cd145ed 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -44,8 +44,9 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
PT_REGS_SP(regs) = esp;
}
-static long execve1(const char *file, char __user * __user *argv,
- char __user *__user *env)
+static long execve1(const char *file,
+ const char __user *const __user *argv,
+ const char __user *const __user *env)
{
long error;
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index 7427c0b..5ddb246 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -51,7 +51,9 @@ long old_mmap(unsigned long addr, unsigned long len,
return err;
}
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
mm_segment_t fs;
int ret;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a84fc34..cea0cd9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -245,6 +245,11 @@ config ARCH_HWEIGHT_CFLAGS
config KTIME_SCALAR
def_bool X86_32
+
+config ARCH_CPU_PROBE_RELEASE
+ def_bool y
+ depends on HOTPLUG_CPU
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -749,11 +754,11 @@ config IOMMU_API
def_bool (AMD_IOMMU || DMAR)
config MAXSMP
- bool "Configure Maximum number of SMP Processors and NUMA Nodes"
+ bool "Enable Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
select CPUMASK_OFFSTACK
---help---
- Configure maximum number of CPUS and NUMA Nodes for this architecture.
+ Enable maximum number of CPUS and NUMA Nodes for this architecture.
If unsure, say N.
config NR_CPUS
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 2984a25..f686f49 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -26,6 +26,7 @@ struct mm_struct;
struct vm_area_struct;
extern pgd_t swapper_pg_dir[1024];
+extern pgd_t trampoline_pg_dir[1024];
static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index feb2ff9..f1d8b44 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -23,8 +23,9 @@ long sys_iopl(unsigned int, struct pt_regs *);
/* kernel/process.c */
int sys_fork(struct pt_regs *);
int sys_vfork(struct pt_regs *);
-long sys_execve(const char __user *, char __user * __user *,
- char __user * __user *, struct pt_regs *);
+long sys_execve(const char __user *,
+ const char __user *const __user *,
+ const char __user *const __user *, struct pt_regs *);
long sys_clone(unsigned long, unsigned long, void __user *,
void __user *, struct pt_regs *);
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index cb507bb..4dde797 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -13,14 +13,17 @@ extern unsigned char *trampoline_base;
extern unsigned long init_rsp;
extern unsigned long initial_code;
+extern unsigned long initial_page_table;
extern unsigned long initial_gs;
#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
extern unsigned long setup_trampoline(void);
+extern void __init setup_trampoline_page_table(void);
extern void __init reserve_trampoline_memory(void);
#else
-static inline void reserve_trampoline_memory(void) {};
+static inline void setup_trampoline_page_table(void) {}
+static inline void reserve_trampoline_memory(void) {}
#endif /* CONFIG_X86_TRAMPOLINE */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 4dc0084..f1efeba 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1728,6 +1728,8 @@ __apicdebuginit(void) print_IO_APIC(void)
struct irq_pin_list *entry;
cfg = desc->chip_data;
+ if (!cfg)
+ continue;
entry = cfg->irq_2_pin;
if (!entry)
continue;
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 60a57b1..ba5f62f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -669,7 +669,7 @@ bool cpu_has_amd_erratum(const int *erratum)
}
/* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 8) | cpu->x86_mask;
+ ms = (cpu->x86_model << 4) | cpu->x86_mask;
while ((range = *erratum++))
if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
(ms >= AMD_MODEL_RANGE_START(range)) &&
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 214ac86..d8d86d0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added)
* Intel Errata AAP53 (model 30)
* Intel Errata BD53 (model 44)
*
- * These chips need to be 'reset' when adding counters by programming
- * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
- * either in sequence on the same PMC or on different PMCs.
+ * The official story:
+ * These chips need to be 'reset' when adding counters by programming the
+ * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
+ * in sequence on the same PMC or on different PMCs.
+ *
+ * In practise it appears some of these events do in fact count, and
+ * we need to programm all 4 events.
*/
-static void intel_pmu_nhm_enable_all(int added)
+static void intel_pmu_nhm_workaround(void)
{
- if (added) {
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- int i;
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ static const unsigned long nhm_magic[4] = {
+ 0x4300B5,
+ 0x4300D2,
+ 0x4300B1,
+ 0x4300B1
+ };
+ struct perf_event *event;
+ int i;
+
+ /*
+ * The Errata requires below steps:
+ * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
+ * 2) Configure 4 PERFEVTSELx with the magic events and clear
+ * the corresponding PMCx;
+ * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
+ * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
+ * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
+ */
+
+ /*
+ * The real steps we choose are a little different from above.
+ * A) To reduce MSR operations, we don't run step 1) as they
+ * are already cleared before this function is called;
+ * B) Call x86_perf_event_update to save PMCx before configuring
+ * PERFEVTSELx with magic number;
+ * C) With step 5), we do clear only when the PERFEVTSELx is
+ * not used currently.
+ * D) Call x86_perf_event_set_period to restore PMCx;
+ */
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
- wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
+ /* We always operate 4 pairs of PERF Counters */
+ for (i = 0; i < 4; i++) {
+ event = cpuc->events[i];
+ if (event)
+ x86_perf_event_update(event);
+ }
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
- wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+ for (i = 0; i < 4; i++) {
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
+ wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
+ }
- for (i = 0; i < 3; i++) {
- struct perf_event *event = cpuc->events[i];
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
- if (!event)
- continue;
+ for (i = 0; i < 4; i++) {
+ event = cpuc->events[i];
+ if (event) {
+ x86_perf_event_set_period(event);
__x86_pmu_enable_event(&event->hw,
- ARCH_PERFMON_EVENTSEL_ENABLE);
- }
+ ARCH_PERFMON_EVENTSEL_ENABLE);
+ } else
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
}
+}
+
+static void intel_pmu_nhm_enable_all(int added)
+{
+ if (added)
+ intel_pmu_nhm_workaround();
intel_pmu_enable_all(added);
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index ff4c453..fa8c1b8 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -334,7 +334,7 @@ ENTRY(startup_32_smp)
/*
* Enable paging
*/
- movl $pa(swapper_pg_dir),%eax
+ movl pa(initial_page_table), %eax
movl %eax,%cr3 /* set the page table pointer.. */
movl %cr0,%eax
orl $X86_CR0_PG,%eax
@@ -614,6 +614,8 @@ ignore_int:
.align 4
ENTRY(initial_code)
.long i386_start_kernel
+ENTRY(initial_page_table)
+ .long pa(swapper_pg_dir)
/*
* BSS section
@@ -629,6 +631,10 @@ ENTRY(swapper_pg_dir)
#endif
swapper_pg_fixmap:
.fill 1024,4,0
+#ifdef CONFIG_X86_TRAMPOLINE
+ENTRY(trampoline_pg_dir)
+ .fill 1024,4,0
+#endif
ENTRY(empty_zero_page)
.fill 4096,1,0
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index ef10940..852b819 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -194,7 +194,7 @@ static struct hw_breakpoint {
unsigned long addr;
int len;
int type;
- struct perf_event **pev;
+ struct perf_event * __percpu *pev;
} breakinfo[HBP_NUM];
static unsigned long early_dr7;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1bfb6cf..770ebfb 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -709,6 +709,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
struct hlist_node *node, *tmp;
unsigned long flags, orig_ret_address = 0;
unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline;
+ kprobe_opcode_t *correct_ret_addr = NULL;
INIT_HLIST_HEAD(&empty_rp);
kretprobe_hash_lock(current, &head, &flags);
@@ -740,14 +741,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
/* another task is sharing our hash bucket */
continue;
+ orig_ret_address = (unsigned long)ri->ret_addr;
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ correct_ret_addr = ri->ret_addr;
+ hlist_for_each_entry_safe(ri, node, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
if (ri->rp && ri->rp->handler) {
__get_cpu_var(current_kprobe) = &ri->rp->kp;
get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+ ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
__get_cpu_var(current_kprobe) = NULL;
}
- orig_ret_address = (unsigned long)ri->ret_addr;
recycle_rp_inst(ri, &empty_rp);
if (orig_ret_address != trampoline_address)
@@ -759,8 +780,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
break;
}
- kretprobe_assert(ri, orig_ret_address, trampoline_address);
-
kretprobe_hash_unlock(current, &flags);
hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 64ecaf0..57d1868 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -301,8 +301,9 @@ EXPORT_SYMBOL(kernel_thread);
/*
* sys_execve() executes a new program.
*/
-long sys_execve(const char __user *name, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs *regs)
+long sys_execve(const char __user *name,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp, struct pt_regs *regs)
{
long error;
char *filename;
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b008e78..c3a4fbb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1014,6 +1014,8 @@ void __init setup_arch(char **cmdline_p)
paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);
+ setup_trampoline_page_table();
+
tboot_probe();
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a5e928b..8b3bfc4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -73,7 +73,6 @@
#ifdef CONFIG_X86_32
u8 apicid_2_node[MAX_APICID];
-static int low_mappings;
#endif
/* State of each CPU */
@@ -91,6 +90,25 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 };
static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x))
#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p))
+
+/*
+ * We need this for trampoline_base protection from concurrent accesses when
+ * off- and onlining cores wildly.
+ */
+static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
+
+void cpu_hotplug_driver_lock()
+{
+ mutex_lock(&x86_cpu_hotplug_driver_mutex);
+}
+
+void cpu_hotplug_driver_unlock()
+{
+ mutex_unlock(&x86_cpu_hotplug_driver_mutex);
+}
+
+ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; }
+ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; }
#else
static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ;
#define get_idle_for_cpu(x) (idle_thread_array[(x)])
@@ -281,6 +299,18 @@ notrace static void __cpuinit start_secondary(void *unused)
* fragile that we want to limit the things done here to the
* most necessary things.
*/
+
+#ifdef CONFIG_X86_32
+ /*
+ * Switch away from the trampoline page-table
+ *
+ * Do this before cpu_init() because it needs to access per-cpu
+ * data which may not be mapped in the trampoline page-table.
+ */
+ load_cr3(swapper_pg_dir);
+ __flush_tlb_all();
+#endif
+
vmi_bringup();
cpu_init();
preempt_disable();
@@ -299,12 +329,6 @@ notrace static void __cpuinit start_secondary(void *unused)
legacy_pic->chip->unmask(0);
}
-#ifdef CONFIG_X86_32
- while (low_mappings)
- cpu_relax();
- __flush_tlb_all();
-#endif
-
/* This must be done before setting cpu_online_mask */
set_cpu_sibling_map(raw_smp_processor_id());
wmb();
@@ -750,6 +774,7 @@ do_rest:
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
+ initial_page_table = __pa(&trampoline_pg_dir);
#else
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
@@ -897,20 +922,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
-#ifdef CONFIG_X86_32
- /* init low mem mapping */
- clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
- min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
- flush_tlb_all();
- low_mappings = 1;
-
err = do_boot_cpu(apicid, cpu);
- zap_low_mappings(false);
- low_mappings = 0;
-#else
- err = do_boot_cpu(apicid, cpu);
-#endif
if (err) {
pr_debug("do_boot_cpu failed %d\n", err);
return -EIO;
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
index 196552b..d5e0662 100644
--- a/arch/x86/kernel/sys_i386_32.c
+++ b/arch/x86/kernel/sys_i386_32.c
@@ -28,7 +28,9 @@
* Do a system call from kernel instead of calling sys_execve so we
* end up with proper pt_regs.
*/
-int kernel_execve(const char *filename, char *const argv[], char *const envp[])
+int kernel_execve(const char *filename,
+ const char *const argv[],
+ const char *const envp[])
{
long __res;
asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx"
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c
index c652ef6..a874495 100644
--- a/arch/x86/kernel/trampoline.c
+++ b/arch/x86/kernel/trampoline.c
@@ -1,6 +1,7 @@
#include
#include
+#include
#include
#if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP)
@@ -37,3 +38,20 @@ unsigned long __trampinit setup_trampoline(void)
memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
return virt_to_phys(trampoline_base);
}
+
+void __init setup_trampoline_page_table(void)
+{
+#ifdef CONFIG_X86_32
+ /* Copy kernel address range */
+ clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ min_t(unsigned long, KERNEL_PGD_PTRS,
+ KERNEL_PGD_BOUNDARY));
+
+ /* Initialize low mappings */
+ clone_pgd_range(trampoline_pg_dir,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ min_t(unsigned long, KERNEL_PGD_PTRS,
+ KERNEL_PGD_BOUNDARY));
+#endif
+}
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 7c2f38f..e3558b9 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -318,8 +318,9 @@ long xtensa_clone(unsigned long clone_flags, unsigned long newsp,
*/
asmlinkage
-long xtensa_execve(const char __user *name, char __user * __user *argv,
- char __user * __user *envp,
+long xtensa_execve(const char __user *name,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp,
long a3, long a4, long a5,
struct pt_regs *regs)
{
diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c
index ea24c1e..2673a3d 100644
--- a/drivers/ata/sata_dwc_460ex.c
+++ b/drivers/ata/sata_dwc_460ex.c
@@ -1588,7 +1588,7 @@ static const struct ata_port_info sata_dwc_port_info[] = {
},
};
-static int sata_dwc_probe(struct of_device *ofdev,
+static int sata_dwc_probe(struct platform_device *ofdev,
const struct of_device_id *match)
{
struct sata_dwc_device *hsdev;
@@ -1702,7 +1702,7 @@ error_out:
return err;
}
-static int sata_dwc_remove(struct of_device *ofdev)
+static int sata_dwc_remove(struct platform_device *ofdev)
{
struct device *dev = &ofdev->dev;
struct ata_host *host = dev_get_drvdata(dev);
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index 2982b3e..057413b 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -94,6 +94,7 @@
#include
#include
#if defined(CONFIG_OF)
+#include
#include
#include
#endif
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index ad46eae..c350d01 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -675,8 +675,8 @@ static int ptmx_open(struct inode *inode, struct file *filp)
}
set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
- filp->private_data = tty;
- file_move(filp, &tty->tty_files);
+
+ tty_add_file(tty, filp);
retval = devpts_pty_new(inode, tty->link);
if (retval)
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 0350c42..949067a 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */
DEFINE_MUTEX(tty_mutex);
EXPORT_SYMBOL(tty_mutex);
+/* Spinlock to protect the tty->tty_files list */
+DEFINE_SPINLOCK(tty_files_lock);
+
static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *);
static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *);
ssize_t redirected_tty_write(struct file *, const char __user *,
@@ -185,6 +188,41 @@ void free_tty_struct(struct tty_struct *tty)
kfree(tty);
}
+static inline struct tty_struct *file_tty(struct file *file)
+{
+ return ((struct tty_file_private *)file->private_data)->tty;
+}
+
+/* Associate a new file with the tty structure */
+void tty_add_file(struct tty_struct *tty, struct file *file)
+{
+ struct tty_file_private *priv;
+
+ /* XXX: must implement proper error handling in callers */
+ priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL);
+
+ priv->tty = tty;
+ priv->file = file;
+ file->private_data = priv;
+
+ spin_lock(&tty_files_lock);
+ list_add(&priv->list, &tty->tty_files);
+ spin_unlock(&tty_files_lock);
+}
+
+/* Delete file from its tty */
+void tty_del_file(struct file *file)
+{
+ struct tty_file_private *priv = file->private_data;
+
+ spin_lock(&tty_files_lock);
+ list_del(&priv->list);
+ spin_unlock(&tty_files_lock);
+ file->private_data = NULL;
+ kfree(priv);
+}
+
+
#define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base)
/**
@@ -235,11 +273,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine)
struct list_head *p;
int count = 0;
- file_list_lock();
+ spin_lock(&tty_files_lock);
list_for_each(p, &tty->tty_files) {
count++;
}
- file_list_unlock();
+ spin_unlock(&tty_files_lock);
if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
tty->driver->subtype == PTY_TYPE_SLAVE &&
tty->link && tty->link->count)
@@ -497,6 +535,7 @@ void __tty_hangup(struct tty_struct *tty)
struct file *cons_filp = NULL;
struct file *filp, *f = NULL;
struct task_struct *p;
+ struct tty_file_private *priv;
int closecount = 0, n;
unsigned long flags;
int refs = 0;
@@ -506,7 +545,7 @@ void __tty_hangup(struct tty_struct *tty)
spin_lock(&redirect_lock);
- if (redirect && redirect->private_data == tty) {
+ if (redirect && file_tty(redirect) == tty) {
f = redirect;
redirect = NULL;
}
@@ -519,9 +558,10 @@ void __tty_hangup(struct tty_struct *tty)
workqueue with the lock held */
check_tty_count(tty, "tty_hangup");
- file_list_lock();
+ spin_lock(&tty_files_lock);
/* This breaks for file handles being sent over AF_UNIX sockets ? */
- list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
+ list_for_each_entry(priv, &tty->tty_files, list) {
+ filp = priv->file;
if (filp->f_op->write == redirected_tty_write)
cons_filp = filp;
if (filp->f_op->write != tty_write)
@@ -530,7 +570,7 @@ void __tty_hangup(struct tty_struct *tty)
__tty_fasync(-1, filp, 0); /* can't block */
filp->f_op = &hung_up_tty_fops;
}
- file_list_unlock();
+ spin_unlock(&tty_files_lock);
tty_ldisc_hangup(tty);
@@ -889,12 +929,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
int i;
- struct tty_struct *tty;
- struct inode *inode;
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct tty_struct *tty = file_tty(file);
struct tty_ldisc *ld;
- tty = file->private_data;
- inode = file->f_path.dentry->d_inode;
if (tty_paranoia_check(tty, inode, "tty_read"))
return -EIO;
if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags)))
@@ -1065,12 +1103,11 @@ void tty_write_message(struct tty_struct *tty, char *msg)
static ssize_t tty_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- struct tty_struct *tty;
struct inode *inode = file->f_path.dentry->d_inode;
+ struct tty_struct *tty = file_tty(file);
+ struct tty_ldisc *ld;
ssize_t ret;
- struct tty_ldisc *ld;
- tty = file->private_data;
if (tty_paranoia_check(tty, inode, "tty_write"))
return -EIO;
if (!tty || !tty->ops->write ||
@@ -1424,9 +1461,9 @@ static void release_one_tty(struct work_struct *work)
tty_driver_kref_put(driver);
module_put(driver->owner);
- file_list_lock();
+ spin_lock(&tty_files_lock);
list_del_init(&tty->tty_files);
- file_list_unlock();
+ spin_unlock(&tty_files_lock);
put_pid(tty->pgrp);
put_pid(tty->session);
@@ -1507,13 +1544,13 @@ static void release_tty(struct tty_struct *tty, int idx)
int tty_release(struct inode *inode, struct file *filp)
{
- struct tty_struct *tty, *o_tty;
+ struct tty_struct *tty = file_tty(filp);
+ struct tty_struct *o_tty;
int pty_master, tty_closing, o_tty_closing, do_sleep;
int devpts;
int idx;
char buf[64];
- tty = filp->private_data;
if (tty_paranoia_check(tty, inode, "tty_release_dev"))
return 0;
@@ -1671,8 +1708,7 @@ int tty_release(struct inode *inode, struct file *filp)
* - do_tty_hangup no longer sees this file descriptor as
* something that needs to be handled for hangups.
*/
- file_kill(filp);
- filp->private_data = NULL;
+ tty_del_file(filp);
/*
* Perform some housekeeping before deciding whether to return.
@@ -1839,8 +1875,8 @@ got_driver:
return PTR_ERR(tty);
}
- filp->private_data = tty;
- file_move(filp, &tty->tty_files);
+ tty_add_file(tty, filp);
+
check_tty_count(tty, "tty_open");
if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
tty->driver->subtype == PTY_TYPE_MASTER)
@@ -1916,11 +1952,10 @@ got_driver:
static unsigned int tty_poll(struct file *filp, poll_table *wait)
{
- struct tty_struct *tty;
+ struct tty_struct *tty = file_tty(filp);
struct tty_ldisc *ld;
int ret = 0;
- tty = filp->private_data;
if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll"))
return 0;
@@ -1933,11 +1968,10 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait)
static int __tty_fasync(int fd, struct file *filp, int on)
{
- struct tty_struct *tty;
+ struct tty_struct *tty = file_tty(filp);
unsigned long flags;
int retval = 0;
- tty = filp->private_data;
if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync"))
goto out;
@@ -2491,13 +2525,13 @@ EXPORT_SYMBOL(tty_pair_get_pty);
*/
long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- struct tty_struct *tty, *real_tty;
+ struct tty_struct *tty = file_tty(file);
+ struct tty_struct *real_tty;
void __user *p = (void __user *)arg;
int retval;
struct tty_ldisc *ld;
struct inode *inode = file->f_dentry->d_inode;
- tty = file->private_data;
if (tty_paranoia_check(tty, inode, "tty_ioctl"))
return -EINVAL;
@@ -2619,7 +2653,7 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct inode *inode = file->f_dentry->d_inode;
- struct tty_struct *tty = file->private_data;
+ struct tty_struct *tty = file_tty(file);
struct tty_ldisc *ld;
int retval = -ENOIOCTLCMD;
@@ -2711,7 +2745,7 @@ void __do_SAK(struct tty_struct *tty)
if (!filp)
continue;
if (filp->f_op->read == tty_read &&
- filp->private_data == tty) {
+ file_tty(filp) == tty) {
printk(KERN_NOTICE "SAK: killed process %d"
" (%s): fd#%d opened to the tty\n",
task_pid_nr(p), p->comm, i);
diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index c734f9b..50590c7 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -194,10 +194,11 @@ static DECLARE_WORK(console_work, console_callback);
int fg_console;
int last_console;
int want_console = -1;
-int saved_fg_console;
-int saved_last_console;
-int saved_want_console;
-int saved_vc_mode;
+static int saved_fg_console;
+static int saved_last_console;
+static int saved_want_console;
+static int saved_vc_mode;
+static int saved_console_blanked;
/*
* For each existing display, we have a pointer to console currently visible
@@ -3449,6 +3450,7 @@ int con_debug_enter(struct vc_data *vc)
saved_last_console = last_console;
saved_want_console = want_console;
saved_vc_mode = vc->vc_mode;
+ saved_console_blanked = console_blanked;
vc->vc_mode = KD_TEXT;
console_blanked = 0;
if (vc->vc_sw->con_debug_enter)
@@ -3492,6 +3494,7 @@ int con_debug_leave(void)
fg_console = saved_fg_console;
last_console = saved_last_console;
want_console = saved_want_console;
+ console_blanked = saved_console_blanked;
vc_cons[fg_console].d->vc_mode = saved_vc_mode;
vc = vc_cons[fg_console].d;
diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
index 0ed763c..b663d57 100644
--- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c
+++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c
@@ -94,6 +94,7 @@
#ifdef CONFIG_OF
/* For open firmware. */
+#include
#include
#include
#endif
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index e635199..0c52899 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1299,6 +1299,7 @@ static const struct hid_device_id hid_blacklist[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE) },
{ HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0006) },
{ HID_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH1) },
{ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) },
{ HID_USB_DEVICE(USB_VENDOR_ID_EZKEY, USB_DEVICE_ID_BTC_8193) },
{ HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) },
diff --git a/drivers/hid/hid-egalax.c b/drivers/hid/hid-egalax.c
index f44bdc0..8ca7f65 100644
--- a/drivers/hid/hid-egalax.c
+++ b/drivers/hid/hid-egalax.c
@@ -159,6 +159,13 @@ static int egalax_event(struct hid_device *hid, struct hid_field *field,
{
struct egalax_data *td = hid_get_drvdata(hid);
+ /* Note, eGalax has two product lines: the first is resistive and
+ * uses a standard parallel multitouch protocol (product ID ==
+ * 48xx). The second is capacitive and uses an unusual "serial"
+ * protocol with a different message for each multitouch finger
+ * (product ID == 72xx). We do not yet generate a correct event
+ * sequence for the capacitive/serial protocol.
+ */
if (hid->claimed & HID_CLAIMED_INPUT) {
struct input_dev *input = field->hidinput->input;
@@ -246,6 +253,8 @@ static void egalax_remove(struct hid_device *hdev)
static const struct hid_device_id egalax_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_DWAV,
USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH) },
+ { HID_USB_DEVICE(USB_VENDOR_ID_DWAV,
+ USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH1) },
{ }
};
MODULE_DEVICE_TABLE(hid, egalax_devices);
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index d3fc13a..85c6d13 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -188,6 +188,7 @@
#define USB_VENDOR_ID_DWAV 0x0eef
#define USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER 0x0001
#define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH 0x480d
+#define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH1 0x720c
#define USB_VENDOR_ID_ELECOM 0x056e
#define USB_DEVICE_ID_ELECOM_BM084 0x0061
diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c
index 346f0e3..bc2e077 100644
--- a/drivers/hid/hid-picolcd.c
+++ b/drivers/hid/hid-picolcd.c
@@ -547,11 +547,11 @@ static void picolcd_fb_destroy(struct fb_info *info)
ref_cnt--;
mutex_lock(&info->lock);
(*ref_cnt)--;
- may_release = !ref_cnt;
+ may_release = !*ref_cnt;
mutex_unlock(&info->lock);
if (may_release) {
- framebuffer_release(info);
vfree((u8 *)info->fix.smem_start);
+ framebuffer_release(info);
}
}
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 254a003..0a29c51 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -266,13 +266,15 @@ static int hiddev_open(struct inode *inode, struct file *file)
{
struct hiddev_list *list;
struct usb_interface *intf;
+ struct hid_device *hid;
struct hiddev *hiddev;
int res;
intf = usb_find_interface(&hiddev_driver, iminor(inode));
if (!intf)
return -ENODEV;
- hiddev = usb_get_intfdata(intf);
+ hid = usb_get_intfdata(intf);
+ hiddev = hid->hiddev;
if (!(list = kzalloc(sizeof(struct hiddev_list), GFP_KERNEL)))
return -ENOMEM;
@@ -587,7 +589,7 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct hiddev_list *list = file->private_data;
struct hiddev *hiddev = list->hiddev;
struct hid_device *hid = hiddev->hid;
- struct usb_device *dev = hid_to_usb_dev(hid);
+ struct usb_device *dev;
struct hiddev_collection_info cinfo;
struct hiddev_report_info rinfo;
struct hiddev_field_info finfo;
@@ -601,9 +603,11 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
/* Called without BKL by compat methods so no BKL taken */
/* FIXME: Who or what stop this racing with a disconnect ?? */
- if (!hiddev->exist)
+ if (!hiddev->exist || !hid)
return -EIO;
+ dev = hid_to_usb_dev(hid);
+
switch (cmd) {
case HIDIOCGVERSION:
@@ -888,7 +892,6 @@ int hiddev_connect(struct hid_device *hid, unsigned int force)
hid->hiddev = hiddev;
hiddev->hid = hid;
hiddev->exist = 1;
- usb_set_intfdata(usbhid->intf, usbhid);
retval = usb_register_dev(usbhid->intf, &hiddev_class);
if (retval) {
err_hid("Not able to get a minor for this device.");
diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig
index 0fba829..4d4d09b 100644
--- a/drivers/hwmon/Kconfig
+++ b/drivers/hwmon/Kconfig
@@ -332,11 +332,11 @@ config SENSORS_F71805F
will be called f71805f.
config SENSORS_F71882FG
- tristate "Fintek F71808E, F71858FG, F71862FG, F71882FG, F71889FG and F8000"
+ tristate "Fintek F71858FG, F71862FG, F71882FG, F71889FG and F8000"
depends on EXPERIMENTAL
help
- If you say yes here you get support for hardware monitoring features
- of the Fintek F71808E, F71858FG, F71862FG/71863FG, F71882FG/F71883FG,
+ If you say yes here you get support for hardware monitoring
+ features of the Fintek F71858FG, F71862FG/71863FG, F71882FG/F71883FG,
F71889FG and F8000 Super-I/O chips.
This driver can also be built as a module. If so, the module
diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c
index 6207120..537841e 100644
--- a/drivers/hwmon/f71882fg.c
+++ b/drivers/hwmon/f71882fg.c
@@ -45,7 +45,6 @@
#define SIO_REG_ADDR 0x60 /* Logical device address (2 bytes) */
#define SIO_FINTEK_ID 0x1934 /* Manufacturers ID */
-#define SIO_F71808_ID 0x0901 /* Chipset ID */
#define SIO_F71858_ID 0x0507 /* Chipset ID */
#define SIO_F71862_ID 0x0601 /* Chipset ID */
#define SIO_F71882_ID 0x0541 /* Chipset ID */
@@ -97,10 +96,9 @@ static unsigned short force_id;
module_param(force_id, ushort, 0);
MODULE_PARM_DESC(force_id, "Override the detected device ID");
-enum chips { f71808fg, f71858fg, f71862fg, f71882fg, f71889fg, f8000 };
+enum chips { f71858fg, f71862fg, f71882fg, f71889fg, f8000 };
static const char *f71882fg_names[] = {
- "f71808fg",
"f71858fg",
"f71862fg",
"f71882fg",
@@ -308,8 +306,8 @@ static struct sensor_device_attribute_2 f71858fg_in_temp_attr[] = {
SENSOR_ATTR_2(temp3_fault, S_IRUGO, show_temp_fault, NULL, 0, 2),
};
-/* In attr common to the f71862fg, f71882fg and f71889fg */
-static struct sensor_device_attribute_2 fxxxx_in_attr[] = {
+/* Temp and in attr common to the f71862fg, f71882fg and f71889fg */
+static struct sensor_device_attribute_2 fxxxx_in_temp_attr[] = {
SENSOR_ATTR_2(in0_input, S_IRUGO, show_in, NULL, 0, 0),
SENSOR_ATTR_2(in1_input, S_IRUGO, show_in, NULL, 0, 1),
SENSOR_ATTR_2(in2_input, S_IRUGO, show_in, NULL, 0, 2),
@@ -319,22 +317,6 @@ static struct sensor_device_attribute_2 fxxxx_in_attr[] = {
SENSOR_ATTR_2(in6_input, S_IRUGO, show_in, NULL, 0, 6),
SENSOR_ATTR_2(in7_input, S_IRUGO, show_in, NULL, 0, 7),
SENSOR_ATTR_2(in8_input, S_IRUGO, show_in, NULL, 0, 8),
-};
-
-/* In attr for the f71808fg */
-static struct sensor_device_attribute_2 f71808_in_attr[] = {
- SENSOR_ATTR_2(in0_input, S_IRUGO, show_in, NULL, 0, 0),
- SENSOR_ATTR_2(in1_input, S_IRUGO, show_in, NULL, 0, 1),
- SENSOR_ATTR_2(in2_input, S_IRUGO, show_in, NULL, 0, 2),
- SENSOR_ATTR_2(in3_input, S_IRUGO, show_in, NULL, 0, 3),
- SENSOR_ATTR_2(in4_input, S_IRUGO, show_in, NULL, 0, 4),
- SENSOR_ATTR_2(in5_input, S_IRUGO, show_in, NULL, 0, 5),
- SENSOR_ATTR_2(in6_input, S_IRUGO, show_in, NULL, 0, 7),
- SENSOR_ATTR_2(in7_input, S_IRUGO, show_in, NULL, 0, 8),
-};
-
-/* Temp attr common to the f71808fg, f71862fg, f71882fg and f71889fg */
-static struct sensor_device_attribute_2 fxxxx_temp_attr[] = {
SENSOR_ATTR_2(temp1_input, S_IRUGO, show_temp, NULL, 0, 1),
SENSOR_ATTR_2(temp1_max, S_IRUGO|S_IWUSR, show_temp_max,
store_temp_max, 0, 1),
@@ -373,10 +355,6 @@ static struct sensor_device_attribute_2 fxxxx_temp_attr[] = {
store_temp_beep, 0, 6),
SENSOR_ATTR_2(temp2_type, S_IRUGO, show_temp_type, NULL, 0, 2),
SENSOR_ATTR_2(temp2_fault, S_IRUGO, show_temp_fault, NULL, 0, 2),
-};
-
-/* Temp and in attr common to the f71862fg, f71882fg and f71889fg */
-static struct sensor_device_attribute_2 f71862_temp_attr[] = {
SENSOR_ATTR_2(temp3_input, S_IRUGO, show_temp, NULL, 0, 3),
SENSOR_ATTR_2(temp3_max, S_IRUGO|S_IWUSR, show_temp_max,
store_temp_max, 0, 3),
@@ -1011,11 +989,6 @@ static struct f71882fg_data *f71882fg_update_device(struct device *dev)
data->temp_type[1] = 6;
break;
}
- } else if (data->type == f71808fg) {
- reg = f71882fg_read8(data, F71882FG_REG_TEMP_TYPE);
- data->temp_type[1] = (reg & 0x02) ? 2 : 4;
- data->temp_type[2] = (reg & 0x04) ? 2 : 4;
-
} else {
reg2 = f71882fg_read8(data, F71882FG_REG_PECI);
if ((reg2 & 0x03) == 0x01)
@@ -1898,8 +1871,7 @@ static ssize_t store_pwm_auto_point_temp(struct device *dev,
val /= 1000;
- if (data->type == f71889fg
- || data->type == f71808fg)
+ if (data->type == f71889fg)
val = SENSORS_LIMIT(val, -128, 127);
else
val = SENSORS_LIMIT(val, 0, 127);
@@ -2002,28 +1974,8 @@ static int __devinit f71882fg_probe(struct platform_device *pdev)
/* fall through! */
case f71862fg:
err = f71882fg_create_sysfs_files(pdev,
- f71862_temp_attr,
- ARRAY_SIZE(f71862_temp_attr));
- if (err)
- goto exit_unregister_sysfs;
- err = f71882fg_create_sysfs_files(pdev,
- fxxxx_in_attr,
- ARRAY_SIZE(fxxxx_in_attr));
- if (err)
- goto exit_unregister_sysfs;
- err = f71882fg_create_sysfs_files(pdev,
- fxxxx_temp_attr,
- ARRAY_SIZE(fxxxx_temp_attr));
- break;
- case f71808fg:
- err = f71882fg_create_sysfs_files(pdev,
- f71808_in_attr,
- ARRAY_SIZE(f71808_in_attr));
- if (err)
- goto exit_unregister_sysfs;
- err = f71882fg_create_sysfs_files(pdev,
- fxxxx_temp_attr,
- ARRAY_SIZE(fxxxx_temp_attr));
+ fxxxx_in_temp_attr,
+ ARRAY_SIZE(fxxxx_in_temp_attr));
break;
case f8000:
err = f71882fg_create_sysfs_files(pdev,
@@ -2050,7 +2002,6 @@ static int __devinit f71882fg_probe(struct platform_device *pdev)
case f71862fg:
err = (data->pwm_enable & 0x15) != 0x15;
break;
- case f71808fg:
case f71882fg:
case f71889fg:
err = 0;
@@ -2096,7 +2047,6 @@ static int __devinit f71882fg_probe(struct platform_device *pdev)
f8000_auto_pwm_attr,
ARRAY_SIZE(f8000_auto_pwm_attr));
break;
- case f71808fg:
case f71889fg:
for (i = 0; i < nr_fans; i++) {
data->pwm_auto_point_mapping[i] =
@@ -2176,22 +2126,8 @@ static int f71882fg_remove(struct platform_device *pdev)
/* fall through! */
case f71862fg:
f71882fg_remove_sysfs_files(pdev,
- f71862_temp_attr,
- ARRAY_SIZE(f71862_temp_attr));
- f71882fg_remove_sysfs_files(pdev,
- fxxxx_in_attr,
- ARRAY_SIZE(fxxxx_in_attr));
- f71882fg_remove_sysfs_files(pdev,
- fxxxx_temp_attr,
- ARRAY_SIZE(fxxxx_temp_attr));
- break;
- case f71808fg:
- f71882fg_remove_sysfs_files(pdev,
- f71808_in_attr,
- ARRAY_SIZE(f71808_in_attr));
- f71882fg_remove_sysfs_files(pdev,
- fxxxx_temp_attr,
- ARRAY_SIZE(fxxxx_temp_attr));
+ fxxxx_in_temp_attr,
+ ARRAY_SIZE(fxxxx_in_temp_attr));
break;
case f8000:
f71882fg_remove_sysfs_files(pdev,
@@ -2259,9 +2195,6 @@ static int __init f71882fg_find(int sioaddr, unsigned short *address,
devid = force_id ? force_id : superio_inw(sioaddr, SIO_REG_DEVID);
switch (devid) {
- case SIO_F71808_ID:
- sio_data->type = f71808fg;
- break;
case SIO_F71858_ID:
sio_data->type = f71858fg;
break;
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index c908c5f..5808731 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -28,7 +28,7 @@ struct evdev {
int minor;
struct input_handle handle;
wait_queue_head_t wait;
- struct evdev_client *grab;
+ struct evdev_client __rcu *grab;
struct list_head client_list;
spinlock_t client_lock; /* protects client_list */
struct mutex mutex;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 11567c7..c148b63 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2136,16 +2136,6 @@ static void sync_sbs(mddev_t * mddev, int nospares)
* with the rest of the array)
*/
mdk_rdev_t *rdev;
-
- /* First make sure individual recovery_offsets are correct */
- list_for_each_entry(rdev, &mddev->disks, same_set) {
- if (rdev->raid_disk >= 0 &&
- mddev->delta_disks >= 0 &&
- !test_bit(In_sync, &rdev->flags) &&
- mddev->curr_resync_completed > rdev->recovery_offset)
- rdev->recovery_offset = mddev->curr_resync_completed;
-
- }
list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->sb_events == mddev->events ||
(nospares &&
@@ -2167,12 +2157,27 @@ static void md_update_sb(mddev_t * mddev, int force_change)
int sync_req;
int nospares = 0;
- mddev->utime = get_seconds();
- if (mddev->external)
- return;
repeat:
+ /* First make sure individual recovery_offsets are correct */
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
+ if (rdev->raid_disk >= 0 &&
+ mddev->delta_disks >= 0 &&
+ !test_bit(In_sync, &rdev->flags) &&
+ mddev->curr_resync_completed > rdev->recovery_offset)
+ rdev->recovery_offset = mddev->curr_resync_completed;
+
+ }
+ if (mddev->external || !mddev->persistent) {
+ clear_bit(MD_CHANGE_DEVS, &mddev->flags);
+ clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
+ wake_up(&mddev->sb_wait);
+ return;
+ }
+
spin_lock_irq(&mddev->write_lock);
+ mddev->utime = get_seconds();
+
set_bit(MD_CHANGE_PENDING, &mddev->flags);
if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
force_change = 1;
@@ -2221,19 +2226,6 @@ repeat:
MD_BUG();
mddev->events --;
}
-
- /*
- * do not write anything to disk if using
- * nonpersistent superblocks
- */
- if (!mddev->persistent) {
- if (!mddev->external)
- clear_bit(MD_CHANGE_PENDING, &mddev->flags);
-
- spin_unlock_irq(&mddev->write_lock);
- wake_up(&mddev->sb_wait);
- return;
- }
sync_sbs(mddev, nospares);
spin_unlock_irq(&mddev->write_lock);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 73cc74f..ad83a4d 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -787,8 +787,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
struct bio_list bl;
struct page **behind_pages = NULL;
const int rw = bio_data_dir(bio);
- const bool do_sync = (bio->bi_rw & REQ_SYNC);
- bool do_barriers;
+ const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
+ unsigned long do_barriers;
mdk_rdev_t *blocked_rdev;
/*
@@ -1120,6 +1120,8 @@ static int raid1_spare_active(mddev_t *mddev)
{
int i;
conf_t *conf = mddev->private;
+ int count = 0;
+ unsigned long flags;
/*
* Find all failed disks within the RAID1 configuration
@@ -1131,15 +1133,16 @@ static int raid1_spare_active(mddev_t *mddev)
if (rdev
&& !test_bit(Faulty, &rdev->flags)
&& !test_and_set_bit(In_sync, &rdev->flags)) {
- unsigned long flags;
- spin_lock_irqsave(&conf->device_lock, flags);
- mddev->degraded--;
- spin_unlock_irqrestore(&conf->device_lock, flags);
+ count++;
+ sysfs_notify_dirent(rdev->sysfs_state);
}
}
+ spin_lock_irqsave(&conf->device_lock, flags);
+ mddev->degraded -= count;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
print_conf(conf);
- return 0;
+ return count;
}
@@ -1640,7 +1643,7 @@ static void raid1d(mddev_t *mddev)
* We already have a nr_pending reference on these rdevs.
*/
int i;
- const bool do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC);
+ const unsigned long do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC);
clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
clear_bit(R1BIO_Barrier, &r1_bio->state);
for (i=0; i < conf->raid_disks; i++)
@@ -1696,7 +1699,7 @@ static void raid1d(mddev_t *mddev)
(unsigned long long)r1_bio->sector);
raid_end_bio_io(r1_bio);
} else {
- const bool do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC;
+ const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC;
r1_bio->bios[r1_bio->read_disk] =
mddev->ro ? IO_BLOCKED : NULL;
r1_bio->read_disk = disk;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a88aeb5..8471838 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -799,7 +799,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
int i;
int chunk_sects = conf->chunk_mask + 1;
const int rw = bio_data_dir(bio);
- const bool do_sync = (bio->bi_rw & REQ_SYNC);
+ const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
struct bio_list bl;
unsigned long flags;
mdk_rdev_t *blocked_rdev;
@@ -1116,6 +1116,8 @@ static int raid10_spare_active(mddev_t *mddev)
int i;
conf_t *conf = mddev->private;
mirror_info_t *tmp;
+ int count = 0;
+ unsigned long flags;
/*
* Find all non-in_sync disks within the RAID10 configuration
@@ -1126,15 +1128,16 @@ static int raid10_spare_active(mddev_t *mddev)
if (tmp->rdev
&& !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
- unsigned long flags;
- spin_lock_irqsave(&conf->device_lock, flags);
- mddev->degraded--;
- spin_unlock_irqrestore(&conf->device_lock, flags);
+ count++;
+ sysfs_notify_dirent(tmp->rdev->sysfs_state);
}
}
+ spin_lock_irqsave(&conf->device_lock, flags);
+ mddev->degraded -= count;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
print_conf(conf);
- return 0;
+ return count;
}
@@ -1734,7 +1737,7 @@ static void raid10d(mddev_t *mddev)
raid_end_bio_io(r10_bio);
bio_put(bio);
} else {
- const bool do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
+ const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
bio_put(bio);
rdev = conf->mirrors[mirror].rdev;
if (printk_ratelimit())
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 866d4b5..69b0a16 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5330,6 +5330,8 @@ static int raid5_spare_active(mddev_t *mddev)
int i;
raid5_conf_t *conf = mddev->private;
struct disk_info *tmp;
+ int count = 0;
+ unsigned long flags;
for (i = 0; i < conf->raid_disks; i++) {
tmp = conf->disks + i;
@@ -5337,14 +5339,15 @@ static int raid5_spare_active(mddev_t *mddev)
&& tmp->rdev->recovery_offset == MaxSector
&& !test_bit(Faulty, &tmp->rdev->flags)
&& !test_and_set_bit(In_sync, &tmp->rdev->flags)) {
- unsigned long flags;
- spin_lock_irqsave(&conf->device_lock, flags);
- mddev->degraded--;
- spin_unlock_irqrestore(&conf->device_lock, flags);
+ count++;
+ sysfs_notify_dirent(tmp->rdev->sysfs_state);
}
}
+ spin_lock_irqsave(&conf->device_lock, flags);
+ mddev->degraded -= count;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
print_raid5_conf(conf);
- return 0;
+ return count;
}
static int raid5_remove_disk(mddev_t *mddev, int number)
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 0efe631..d80cfdc 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -86,7 +86,9 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev)
init_waitqueue_head(&host->wq);
INIT_DELAYED_WORK(&host->detect, mmc_rescan);
INIT_DELAYED_WORK_DEFERRABLE(&host->disable, mmc_host_deeper_disable);
+#ifdef CONFIG_PM
host->pm_notify.notifier_call = mmc_pm_notify;
+#endif
/*
* By default, hosts do not support SGIO or large requests.
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 283190b..68d1279 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -132,7 +132,7 @@ config MMC_SDHCI_CNS3XXX
config MMC_SDHCI_S3C
tristate "SDHCI support on Samsung S3C SoC"
- depends on MMC_SDHCI && (PLAT_S3C24XX || PLAT_S3C64XX)
+ depends on MMC_SDHCI && PLAT_SAMSUNG
help
This selects the Secure Digital Host Controller Interface (SDHCI)
often referrered to as the HSMMC block in some of the Samsung S3C
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index 0a7f261..71ad416 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -242,7 +242,7 @@ static void sdhci_s3c_notify_change(struct platform_device *dev, int state)
{
struct sdhci_host *host = platform_get_drvdata(dev);
if (host) {
- mutex_lock(&host->lock);
+ spin_lock(&host->lock);
if (state) {
dev_dbg(&dev->dev, "card inserted.\n");
host->flags &= ~SDHCI_DEVICE_DEAD;
@@ -252,8 +252,8 @@ static void sdhci_s3c_notify_change(struct platform_device *dev, int state)
host->flags |= SDHCI_DEVICE_DEAD;
host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
}
- sdhci_card_detect(host);
- mutex_unlock(&host->lock);
+ tasklet_schedule(&host->card_tasklet);
+ spin_unlock(&host->lock);
}
}
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 7855121..401527d 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1180,7 +1180,8 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
else
ctrl &= ~SDHCI_CTRL_4BITBUS;
- if (ios->timing == MMC_TIMING_SD_HS)
+ if (ios->timing == MMC_TIMING_SD_HS &&
+ !(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT))
ctrl |= SDHCI_CTRL_HISPD;
else
ctrl &= ~SDHCI_CTRL_HISPD;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 036cfae..d316bc7 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -245,6 +245,8 @@ struct sdhci_host {
#define SDHCI_QUIRK_MISSING_CAPS (1<<27)
/* Controller uses Auto CMD12 command to stop the transfer */
#define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 (1<<28)
+/* Controller doesn't have HISPD bit field in HI-SPEED SD card */
+#define SDHCI_QUIRK_NO_HISPD_BIT (1<<29)
int irq; /* Device IRQ */
void __iomem * ioaddr; /* Mapped address */
diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c
index 00af55d..fe63f6b 100644
--- a/drivers/mtd/maps/physmap_of.c
+++ b/drivers/mtd/maps/physmap_of.c
@@ -22,6 +22,7 @@
#include
#include
#include
+#include
#include
#include
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index a3c7473..d551ddd 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2866,6 +2866,7 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
*/
if (id_data[0] == id_data[6] && id_data[1] == id_data[7] &&
id_data[0] == NAND_MFR_SAMSUNG &&
+ (chip->cellinfo & NAND_CI_CELLTYPE_MSK) &&
id_data[5] != 0x00) {
/* Calc pagesize */
mtd->writesize = 2048 << (extid & 0x03);
@@ -2934,14 +2935,10 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
chip->chip_shift = ffs((unsigned)(chip->chipsize >> 32)) + 32 - 1;
/* Set the bad block position */
- if (!(busw & NAND_BUSWIDTH_16) && (*maf_id == NAND_MFR_STMICRO ||
- (*maf_id == NAND_MFR_SAMSUNG &&
- mtd->writesize == 512) ||
- *maf_id == NAND_MFR_AMD))
- chip->badblockpos = NAND_SMALL_BADBLOCK_POS;
- else
+ if (mtd->writesize > 512 || (busw & NAND_BUSWIDTH_16))
chip->badblockpos = NAND_LARGE_BADBLOCK_POS;
-
+ else
+ chip->badblockpos = NAND_SMALL_BADBLOCK_POS;
/* Get chip options, preserve non chip based options */
chip->options &= ~NAND_CHIPOPTIONS_MSK;
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index e02fa4f..4d89f37 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -363,7 +363,7 @@ static struct pxa3xx_nand_flash *builtin_flash_types[] = {
#define tAR_NDTR1(r) (((r) >> 0) & 0xf)
/* convert nano-seconds to nand flash controller clock cycles */
-#define ns2cycle(ns, clk) (int)(((ns) * (clk / 1000000) / 1000) - 1)
+#define ns2cycle(ns, clk) (int)((ns) * (clk / 1000000) / 1000)
/* convert nand flash controller clock cycles to nano-seconds */
#define cycle2ns(c, clk) ((((c) + 1) * 1000000 + clk / 500) / (clk / 1000))
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index 0d5de25..373dcfe 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -48,6 +48,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -476,6 +477,26 @@ ath5k_pci_probe(struct pci_dev *pdev,
int ret;
u8 csz;
+ /*
+ * L0s needs to be disabled on all ath5k cards.
+ *
+ * For distributions shipping with CONFIG_PCIEASPM (this will be enabled
+ * by default in the future in 2.6.36) this will also mean both L1 and
+ * L0s will be disabled when a pre 1.1 PCIe device is detected. We do
+ * know L1 works correctly even for all ath5k pre 1.1 PCIe devices
+ * though but cannot currently undue the effect of a blacklist, for
+ * details you can read pcie_aspm_sanity_check() and see how it adjusts
+ * the device link capability.
+ *
+ * It may be possible in the future to implement some PCI API to allow
+ * drivers to override blacklists for pre 1.1 PCIe but for now it is
+ * best to accept that both L0s and L1 will be disabled completely for
+ * distributions shipping with CONFIG_PCIEASPM rather than having this
+ * issue present. Motivation for adding this new API will be to help
+ * with power consumption for some of these devices.
+ */
+ pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S);
+
ret = pci_enable_device(pdev);
if (ret) {
dev_err(&pdev->dev, "can't enable device\n");
diff --git a/drivers/net/wireless/ath/ath9k/eeprom.h b/drivers/net/wireless/ath/ath9k/eeprom.h
index 8750c55..7f48df1 100644
--- a/drivers/net/wireless/ath/ath9k/eeprom.h
+++ b/drivers/net/wireless/ath/ath9k/eeprom.h
@@ -191,6 +191,7 @@
#define AR9287_EEP_NO_BACK_VER AR9287_EEP_MINOR_VER_1
#define AR9287_EEP_START_LOC 128
+#define AR9287_HTC_EEP_START_LOC 256
#define AR9287_NUM_2G_CAL_PIERS 3
#define AR9287_NUM_2G_CCK_TARGET_POWERS 3
#define AR9287_NUM_2G_20_TARGET_POWERS 3
diff --git a/drivers/net/wireless/ath/ath9k/eeprom_9287.c b/drivers/net/wireless/ath/ath9k/eeprom_9287.c
index 4a52cf0..dff2da7 100644
--- a/drivers/net/wireless/ath/ath9k/eeprom_9287.c
+++ b/drivers/net/wireless/ath/ath9k/eeprom_9287.c
@@ -34,9 +34,14 @@ static bool ath9k_hw_ar9287_fill_eeprom(struct ath_hw *ah)
struct ar9287_eeprom *eep = &ah->eeprom.map9287;
struct ath_common *common = ath9k_hw_common(ah);
u16 *eep_data;
- int addr, eep_start_loc = AR9287_EEP_START_LOC;
+ int addr, eep_start_loc;
eep_data = (u16 *)eep;
+ if (ah->hw_version.devid == 0x7015)
+ eep_start_loc = AR9287_HTC_EEP_START_LOC;
+ else
+ eep_start_loc = AR9287_EEP_START_LOC;
+
if (!ath9k_hw_use_flash(ah)) {
ath_print(common, ATH_DBG_EEPROM,
"Reading from EEPROM, not flash\n");
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index 61c1bee..17e7a9a 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -799,7 +799,7 @@ static int ath9k_hif_usb_download_fw(struct hif_device_usb *hif_dev)
}
kfree(buf);
- if (hif_dev->device_id == 0x7010)
+ if ((hif_dev->device_id == 0x7010) || (hif_dev->device_id == 0x7015))
firm_offset = AR7010_FIRMWARE_TEXT;
else
firm_offset = AR9271_FIRMWARE_TEXT;
@@ -901,6 +901,7 @@ static int ath9k_hif_usb_probe(struct usb_interface *interface,
switch(hif_dev->device_id) {
case 0x7010:
+ case 0x7015:
case 0x9018:
if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x0202)
hif_dev->fw_name = FIRMWARE_AR7010_1_1;
@@ -912,11 +913,6 @@ static int ath9k_hif_usb_probe(struct usb_interface *interface,
break;
}
- if (!hif_dev->fw_name) {
- dev_err(&udev->dev, "Can't determine firmware !\n");
- goto err_htc_hw_alloc;
- }
-
ret = ath9k_hif_usb_dev_init(hif_dev);
if (ret) {
ret = -EINVAL;
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index 148b433..2d42791 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -245,6 +245,7 @@ static int ath9k_init_htc_services(struct ath9k_htc_priv *priv, u16 devid)
switch(devid) {
case 0x7010:
+ case 0x7015:
case 0x9018:
priv->htc->credits = 45;
break;
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
index ebed9d1..7d09b4b 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
@@ -366,7 +366,8 @@ static void ath9k_htc_setup_rate(struct ath9k_htc_priv *priv,
caps = WLAN_RC_HT_FLAG;
if (sta->ht_cap.mcs.rx_mask[1])
caps |= WLAN_RC_DS_FLAG;
- if (sta->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)
+ if ((sta->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) &&
+ (conf_is_ht40(&priv->hw->conf)))
caps |= WLAN_RC_40_FLAG;
if (conf_is_ht40(&priv->hw->conf) &&
(sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40))
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
index bd0b4ac..2a6e45a 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
@@ -78,18 +78,23 @@ int ath9k_htc_tx_start(struct ath9k_htc_priv *priv, struct sk_buff *skb)
struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
struct ieee80211_sta *sta = tx_info->control.sta;
struct ath9k_htc_sta *ista;
- struct ath9k_htc_vif *avp;
struct ath9k_htc_tx_ctl tx_ctl;
enum htc_endpoint_id epid;
u16 qnum;
__le16 fc;
u8 *tx_fhdr;
- u8 sta_idx;
+ u8 sta_idx, vif_idx;
hdr = (struct ieee80211_hdr *) skb->data;
fc = hdr->frame_control;
- avp = (struct ath9k_htc_vif *) tx_info->control.vif->drv_priv;
+ if (tx_info->control.vif &&
+ (struct ath9k_htc_vif *) tx_info->control.vif->drv_priv)
+ vif_idx = ((struct ath9k_htc_vif *)
+ tx_info->control.vif->drv_priv)->index;
+ else
+ vif_idx = priv->nvifs;
+
if (sta) {
ista = (struct ath9k_htc_sta *) sta->drv_priv;
sta_idx = ista->index;
@@ -106,7 +111,7 @@ int ath9k_htc_tx_start(struct ath9k_htc_priv *priv, struct sk_buff *skb)
memset(&tx_hdr, 0, sizeof(struct tx_frame_hdr));
tx_hdr.node_idx = sta_idx;
- tx_hdr.vif_idx = avp->index;
+ tx_hdr.vif_idx = vif_idx;
if (tx_info->flags & IEEE80211_TX_CTL_AMPDU) {
tx_ctl.type = ATH9K_HTC_AMPDU;
@@ -169,7 +174,7 @@ int ath9k_htc_tx_start(struct ath9k_htc_priv *priv, struct sk_buff *skb)
tx_ctl.type = ATH9K_HTC_NORMAL;
mgmt_hdr.node_idx = sta_idx;
- mgmt_hdr.vif_idx = avp->index;
+ mgmt_hdr.vif_idx = vif_idx;
mgmt_hdr.tidno = 0;
mgmt_hdr.flags = 0;
diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h
index 633e3d9..d01c4ad 100644
--- a/drivers/net/wireless/ath/ath9k/reg.h
+++ b/drivers/net/wireless/ath/ath9k/reg.h
@@ -899,6 +899,7 @@
#define AR_DEVID_7010(_ah) \
(((_ah)->hw_version.devid == 0x7010) || \
+ ((_ah)->hw_version.devid == 0x7015) || \
((_ah)->hw_version.devid == 0x9018))
#define AR_RADIO_SREV_MAJOR 0xf0
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index 16bbfa3..1189dbb 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -6665,12 +6665,13 @@ static int __init ipw2100_init(void)
printk(KERN_INFO DRV_NAME ": %s, %s\n", DRV_DESCRIPTION, DRV_VERSION);
printk(KERN_INFO DRV_NAME ": %s\n", DRV_COPYRIGHT);
+ pm_qos_add_request(&ipw2100_pm_qos_req, PM_QOS_CPU_DMA_LATENCY,
+ PM_QOS_DEFAULT_VALUE);
+
ret = pci_register_driver(&ipw2100_pci_driver);
if (ret)
goto out;
- pm_qos_add_request(&ipw2100_pm_qos_req, PM_QOS_CPU_DMA_LATENCY,
- PM_QOS_DEFAULT_VALUE);
#ifdef CONFIG_IPW2100_DEBUG
ipw2100_debug_level = debug;
ret = driver_create_file(&ipw2100_pci_driver.driver,
diff --git a/drivers/net/wireless/wl12xx/wl1251_cmd.c b/drivers/net/wireless/wl12xx/wl1251_cmd.c
index a37b30c..ce3722f 100644
--- a/drivers/net/wireless/wl12xx/wl1251_cmd.c
+++ b/drivers/net/wireless/wl12xx/wl1251_cmd.c
@@ -484,7 +484,7 @@ int wl1251_cmd_trigger_scan_to(struct wl1251 *wl, u32 timeout)
cmd->timeout = timeout;
- ret = wl1251_cmd_send(wl, CMD_SCAN, cmd, sizeof(*cmd));
+ ret = wl1251_cmd_send(wl, CMD_TRIGGER_SCAN_TO, cmd, sizeof(*cmd));
if (ret < 0) {
wl1251_error("cmd trigger scan to failed: %d", ret);
goto out;
diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c
index e058c2b..ca05aef 100644
--- a/drivers/platform/x86/asus_acpi.c
+++ b/drivers/platform/x86/asus_acpi.c
@@ -938,10 +938,11 @@ static int set_brightness(int value)
/* SPLV laptop */
if (hotk->methods->brightness_set) {
if (!write_acpi_int(hotk->handle, hotk->methods->brightness_set,
- value, NULL))
+ value, NULL)) {
printk(KERN_WARNING
"Asus ACPI: Error changing brightness\n");
ret = -EIO;
+ }
goto out;
}
@@ -953,10 +954,11 @@ static int set_brightness(int value)
hotk->methods->brightness_down,
NULL, NULL);
(value > 0) ? value-- : value++;
- if (ACPI_FAILURE(status))
+ if (ACPI_FAILURE(status)) {
printk(KERN_WARNING
"Asus ACPI: Error changing brightness\n");
ret = -EIO;
+ }
}
out:
return ret;
diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c
index d071ce0..097083c 100644
--- a/drivers/platform/x86/compal-laptop.c
+++ b/drivers/platform/x86/compal-laptop.c
@@ -841,6 +841,14 @@ static struct dmi_system_id __initdata compal_dmi_table[] = {
.callback = dmi_check_cb
},
{
+ .ident = "Dell Mini 1012",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"),
+ },
+ .callback = dmi_check_cb
+ },
+ {
.ident = "Dell Inspiron 11z",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
@@ -1092,5 +1100,6 @@ MODULE_ALIAS("dmi:*:rnJHL90:rvrREFERENCE:*");
MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron910:*");
MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1010:*");
MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1011:*");
+MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1012:*");
MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1110:*");
MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1210:*");
diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index b41ed5c..4413975 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c
@@ -122,6 +122,13 @@ static struct dmi_system_id __devinitdata dell_blacklist[] = {
},
},
{
+ .ident = "Dell Mini 1012",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"),
+ },
+ },
+ {
.ident = "Dell Inspiron 11z",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index afe82e5..9024480 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c
@@ -1342,8 +1342,10 @@ static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
limits = &ips_lv_limits;
else if (strstr(boot_cpu_data.x86_model_id, "CPU U"))
limits = &ips_ulv_limits;
- else
+ else {
dev_info(&ips->dev->dev, "No CPUID match found.\n");
+ goto out;
+ }
rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power);
tdp = turbo_power & TURBO_TDP_MASK;
@@ -1432,6 +1434,12 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
spin_lock_init(&ips->turbo_status_lock);
+ ret = pci_enable_device(dev);
+ if (ret) {
+ dev_err(&dev->dev, "can't enable PCI device, aborting\n");
+ goto error_free;
+ }
+
if (!pci_resource_start(dev, 0)) {
dev_err(&dev->dev, "TBAR not assigned, aborting\n");
ret = -ENXIO;
@@ -1444,11 +1452,6 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
goto error_free;
}
- ret = pci_enable_device(dev);
- if (ret) {
- dev_err(&dev->dev, "can't enable PCI device, aborting\n");
- goto error_free;
- }
ips->regmap = ioremap(pci_resource_start(dev, 0),
pci_resource_len(dev, 0));
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 5d6119b..e35ed12 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -1911,6 +1911,17 @@ enum { /* hot key scan codes (derived from ACPI DSDT) */
TP_ACPI_HOTKEYSCAN_VOLUMEDOWN,
TP_ACPI_HOTKEYSCAN_MUTE,
TP_ACPI_HOTKEYSCAN_THINKPAD,
+ TP_ACPI_HOTKEYSCAN_UNK1,
+ TP_ACPI_HOTKEYSCAN_UNK2,
+ TP_ACPI_HOTKEYSCAN_UNK3,
+ TP_ACPI_HOTKEYSCAN_UNK4,
+ TP_ACPI_HOTKEYSCAN_UNK5,
+ TP_ACPI_HOTKEYSCAN_UNK6,
+ TP_ACPI_HOTKEYSCAN_UNK7,
+ TP_ACPI_HOTKEYSCAN_UNK8,
+
+ /* Hotkey keymap size */
+ TPACPI_HOTKEY_MAP_LEN
};
enum { /* Keys/events available through NVRAM polling */
@@ -3082,6 +3093,8 @@ static const struct tpacpi_quirk tpacpi_hotkey_qtable[] __initconst = {
TPACPI_Q_IBM('1', 'D', TPACPI_HK_Q_INIMASK), /* X22, X23, X24 */
};
+typedef u16 tpacpi_keymap_t[TPACPI_HOTKEY_MAP_LEN];
+
static int __init hotkey_init(struct ibm_init_struct *iibm)
{
/* Requirements for changing the default keymaps:
@@ -3113,9 +3126,17 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
* If the above is too much to ask, don't change the keymap.
* Ask the thinkpad-acpi maintainer to do it, instead.
*/
- static u16 ibm_keycode_map[] __initdata = {
+
+ enum keymap_index {
+ TPACPI_KEYMAP_IBM_GENERIC = 0,
+ TPACPI_KEYMAP_LENOVO_GENERIC,
+ };
+
+ static const tpacpi_keymap_t tpacpi_keymaps[] __initconst = {
+ /* Generic keymap for IBM ThinkPads */
+ [TPACPI_KEYMAP_IBM_GENERIC] = {
/* Scan Codes 0x00 to 0x0B: ACPI HKEY FN+F1..F12 */
- KEY_FN_F1, KEY_FN_F2, KEY_COFFEE, KEY_SLEEP,
+ KEY_FN_F1, KEY_BATTERY, KEY_COFFEE, KEY_SLEEP,
KEY_WLAN, KEY_FN_F6, KEY_SWITCHVIDEOMODE, KEY_FN_F8,
KEY_FN_F9, KEY_FN_F10, KEY_FN_F11, KEY_SUSPEND,
@@ -3146,11 +3167,13 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
/* (assignments unknown, please report if found) */
KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
- };
- static u16 lenovo_keycode_map[] __initdata = {
+ },
+
+ /* Generic keymap for Lenovo ThinkPads */
+ [TPACPI_KEYMAP_LENOVO_GENERIC] = {
/* Scan Codes 0x00 to 0x0B: ACPI HKEY FN+F1..F12 */
KEY_FN_F1, KEY_COFFEE, KEY_BATTERY, KEY_SLEEP,
- KEY_WLAN, KEY_FN_F6, KEY_SWITCHVIDEOMODE, KEY_FN_F8,
+ KEY_WLAN, KEY_CAMERA, KEY_SWITCHVIDEOMODE, KEY_FN_F8,
KEY_FN_F9, KEY_FN_F10, KEY_FN_F11, KEY_SUSPEND,
/* Scan codes 0x0C to 0x1F: Other ACPI HKEY hot keys */
@@ -3189,11 +3212,25 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
/* (assignments unknown, please report if found) */
KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN,
+ },
+ };
+
+ static const struct tpacpi_quirk tpacpi_keymap_qtable[] __initconst = {
+ /* Generic maps (fallback) */
+ {
+ .vendor = PCI_VENDOR_ID_IBM,
+ .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_ANY,
+ .quirks = TPACPI_KEYMAP_IBM_GENERIC,
+ },
+ {
+ .vendor = PCI_VENDOR_ID_LENOVO,
+ .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_ANY,
+ .quirks = TPACPI_KEYMAP_LENOVO_GENERIC,
+ },
};
-#define TPACPI_HOTKEY_MAP_LEN ARRAY_SIZE(ibm_keycode_map)
-#define TPACPI_HOTKEY_MAP_SIZE sizeof(ibm_keycode_map)
-#define TPACPI_HOTKEY_MAP_TYPESIZE sizeof(ibm_keycode_map[0])
+#define TPACPI_HOTKEY_MAP_SIZE sizeof(tpacpi_keymap_t)
+#define TPACPI_HOTKEY_MAP_TYPESIZE sizeof(tpacpi_keymap_t[0])
int res, i;
int status;
@@ -3202,6 +3239,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
bool tabletsw_state = false;
unsigned long quirks;
+ unsigned long keymap_id;
vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
"initializing hotkey subdriver\n");
@@ -3342,7 +3380,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
goto err_exit;
/* Set up key map */
-
hotkey_keycode_map = kmalloc(TPACPI_HOTKEY_MAP_SIZE,
GFP_KERNEL);
if (!hotkey_keycode_map) {
@@ -3352,17 +3389,14 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
goto err_exit;
}
- if (tpacpi_is_lenovo()) {
- dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
- "using Lenovo default hot key map\n");
- memcpy(hotkey_keycode_map, &lenovo_keycode_map,
- TPACPI_HOTKEY_MAP_SIZE);
- } else {
- dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
- "using IBM default hot key map\n");
- memcpy(hotkey_keycode_map, &ibm_keycode_map,
- TPACPI_HOTKEY_MAP_SIZE);
- }
+ keymap_id = tpacpi_check_quirks(tpacpi_keymap_qtable,
+ ARRAY_SIZE(tpacpi_keymap_qtable));
+ BUG_ON(keymap_id >= ARRAY_SIZE(tpacpi_keymaps));
+ dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
+ "using keymap number %lu\n", keymap_id);
+
+ memcpy(hotkey_keycode_map, &tpacpi_keymaps[keymap_id],
+ TPACPI_HOTKEY_MAP_SIZE);
input_set_capability(tpacpi_inputdev, EV_MSC, MSC_SCAN);
tpacpi_inputdev->keycodesize = TPACPI_HOTKEY_MAP_TYPESIZE;
@@ -3469,7 +3503,8 @@ static bool hotkey_notify_hotkey(const u32 hkey,
*send_acpi_ev = true;
*ignore_acpi_ev = false;
- if (scancode > 0 && scancode < 0x21) {
+ /* HKEY event 0x1001 is scancode 0x00 */
+ if (scancode > 0 && scancode <= TPACPI_HOTKEY_MAP_LEN) {
scancode--;
if (!(hotkey_source_mask & (1 << scancode))) {
tpacpi_input_send_key_masked(scancode);
@@ -6080,13 +6115,18 @@ static struct backlight_ops ibm_backlight_data = {
/* --------------------------------------------------------------------- */
+/*
+ * Call _BCL method of video device. On some ThinkPads this will
+ * switch the firmware to the ACPI brightness control mode.
+ */
+
static int __init tpacpi_query_bcl_levels(acpi_handle handle)
{
struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
union acpi_object *obj;
int rc;
- if (ACPI_SUCCESS(acpi_evaluate_object(handle, NULL, NULL, &buffer))) {
+ if (ACPI_SUCCESS(acpi_evaluate_object(handle, "_BCL", NULL, &buffer))) {
obj = (union acpi_object *)buffer.pointer;
if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) {
printk(TPACPI_ERR "Unknown _BCL data, "
@@ -6103,55 +6143,22 @@ static int __init tpacpi_query_bcl_levels(acpi_handle handle)
return rc;
}
-static acpi_status __init tpacpi_acpi_walk_find_bcl(acpi_handle handle,
- u32 lvl, void *context, void **rv)
-{
- char name[ACPI_PATH_SEGMENT_LENGTH];
- struct acpi_buffer buffer = { sizeof(name), &name };
-
- if (ACPI_SUCCESS(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)) &&
- !strncmp("_BCL", name, sizeof(name) - 1)) {
- BUG_ON(!rv || !*rv);
- **(int **)rv = tpacpi_query_bcl_levels(handle);
- return AE_CTRL_TERMINATE;
- } else {
- return AE_OK;
- }
-}
/*
* Returns 0 (no ACPI _BCL or _BCL invalid), or size of brightness map
*/
static unsigned int __init tpacpi_check_std_acpi_brightness_support(void)
{
- int status;
+ acpi_handle video_device;
int bcl_levels = 0;
- void *bcl_ptr = &bcl_levels;
-
- if (!vid_handle)
- TPACPI_ACPIHANDLE_INIT(vid);
-
- if (!vid_handle)
- return 0;
-
- /*
- * Search for a _BCL method, and execute it. This is safe on all
- * ThinkPads, and as a side-effect, _BCL will place a Lenovo Vista
- * BIOS in ACPI backlight control mode. We do NOT have to care
- * about calling the _BCL method in an enabled video device, any
- * will do for our purposes.
- */
- status = acpi_walk_namespace(ACPI_TYPE_METHOD, vid_handle, 3,
- tpacpi_acpi_walk_find_bcl, NULL, NULL,
- &bcl_ptr);
+ tpacpi_acpi_handle_locate("video", ACPI_VIDEO_HID, &video_device);
+ if (video_device)
+ bcl_levels = tpacpi_query_bcl_levels(video_device);
- if (ACPI_SUCCESS(status) && bcl_levels > 2) {
- tp_features.bright_acpimode = 1;
- return bcl_levels - 2;
- }
+ tp_features.bright_acpimode = (bcl_levels > 0);
- return 0;
+ return (bcl_levels > 2) ? (bcl_levels - 2) : 0;
}
/*
@@ -6244,28 +6251,6 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
if (tp_features.bright_unkfw)
return 1;
- if (tp_features.bright_acpimode) {
- if (acpi_video_backlight_support()) {
- if (brightness_enable > 1) {
- printk(TPACPI_NOTICE
- "Standard ACPI backlight interface "
- "available, not loading native one.\n");
- return 1;
- } else if (brightness_enable == 1) {
- printk(TPACPI_NOTICE
- "Backlight control force enabled, even if standard "
- "ACPI backlight interface is available\n");
- }
- } else {
- if (brightness_enable > 1) {
- printk(TPACPI_NOTICE
- "Standard ACPI backlight interface not "
- "available, thinkpad_acpi native "
- "brightness control enabled\n");
- }
- }
- }
-
if (!brightness_enable) {
dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_BRGHT,
"brightness support disabled by "
@@ -6273,6 +6258,26 @@ static int __init brightness_init(struct ibm_init_struct *iibm)
return 1;
}
+ if (acpi_video_backlight_support()) {
+ if (brightness_enable > 1) {
+ printk(TPACPI_INFO
+ "Standard ACPI backlight interface "
+ "available, not loading native one.\n");
+ return 1;
+ } else if (brightness_enable == 1) {
+ printk(TPACPI_WARN
+ "Cannot enable backlight brightness support, "
+ "ACPI is already handling it. Refer to the "
+ "acpi_backlight kernel parameter\n");
+ return 1;
+ }
+ } else if (tp_features.bright_acpimode && brightness_enable > 1) {
+ printk(TPACPI_NOTICE
+ "Standard ACPI backlight interface not "
+ "available, thinkpad_acpi native "
+ "brightness control enabled\n");
+ }
+
/*
* Check for module parameter bogosity, note that we
* init brightness_mode to TPACPI_BRGHT_MODE_MAX in order to be
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index 95a895d..c8dc392 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -56,6 +56,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/drivers/scsi/qla4xxx/ql4_glbl.h b/drivers/scsi/qla4xxx/ql4_glbl.h
index f065204..95a26fb 100644
--- a/drivers/scsi/qla4xxx/ql4_glbl.h
+++ b/drivers/scsi/qla4xxx/ql4_glbl.h
@@ -132,7 +132,7 @@ void qla4_8xxx_idc_unlock(struct scsi_qla_host *ha);
int qla4_8xxx_device_state_handler(struct scsi_qla_host *ha);
void qla4_8xxx_need_qsnt_handler(struct scsi_qla_host *ha);
void qla4_8xxx_clear_drv_active(struct scsi_qla_host *ha);
-inline void qla4_8xxx_set_drv_active(struct scsi_qla_host *ha);
+void qla4_8xxx_set_drv_active(struct scsi_qla_host *ha);
extern int ql4xextended_error_logging;
extern int ql4xdiscoverywait;
diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c
index e031a73..5d4a382 100644
--- a/drivers/scsi/qla4xxx/ql4_nx.c
+++ b/drivers/scsi/qla4xxx/ql4_nx.c
@@ -1418,7 +1418,7 @@ static int qla4_8xxx_rcvpeg_ready(struct scsi_qla_host *ha)
return QLA_SUCCESS;
}
-inline void
+void
qla4_8xxx_set_drv_active(struct scsi_qla_host *ha)
{
uint32_t drv_active;
diff --git a/drivers/serial/of_serial.c b/drivers/serial/of_serial.c
index 659a695..2af8fd1 100644
--- a/drivers/serial/of_serial.c
+++ b/drivers/serial/of_serial.c
@@ -14,11 +14,10 @@
#include
#include
#include
+#include
#include
#include
-#include
-
struct of_serial_info {
int type;
int line;
diff --git a/drivers/serial/suncore.c b/drivers/serial/suncore.c
index 544f2e2..6381a02 100644
--- a/drivers/serial/suncore.c
+++ b/drivers/serial/suncore.c
@@ -55,7 +55,12 @@ EXPORT_SYMBOL(sunserial_unregister_minors);
int sunserial_console_match(struct console *con, struct device_node *dp,
struct uart_driver *drv, int line, bool ignore_line)
{
- if (!con || of_console_device != dp)
+ if (!con)
+ return 0;
+
+ drv->cons = con;
+
+ if (of_console_device != dp)
return 0;
if (!ignore_line) {
@@ -69,12 +74,10 @@ int sunserial_console_match(struct console *con, struct device_node *dp,
return 0;
}
- con->index = line;
- drv->cons = con;
-
- if (!console_set_on_cmdline)
+ if (!console_set_on_cmdline) {
+ con->index = line;
add_preferred_console(con->name, line, NULL);
-
+ }
return 1;
}
EXPORT_SYMBOL(sunserial_console_match);
diff --git a/drivers/spi/coldfire_qspi.c b/drivers/spi/coldfire_qspi.c
index 59be3ef..052b3c7 100644
--- a/drivers/spi/coldfire_qspi.c
+++ b/drivers/spi/coldfire_qspi.c
@@ -24,6 +24,7 @@
#include
#include
#include
+#include
#include
#include
#include
diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c
index cdc4dd5..8ec83d2 100644
--- a/drivers/staging/pohmelfs/path_entry.c
+++ b/drivers/staging/pohmelfs/path_entry.c
@@ -44,9 +44,9 @@ int pohmelfs_construct_path_string(struct pohmelfs_inode *pi, void *data, int le
return -ENOENT;
}
- read_lock(¤t->fs->lock);
+ spin_lock(¤t->fs->lock);
path.mnt = mntget(current->fs->root.mnt);
- read_unlock(¤t->fs->lock);
+ spin_unlock(¤t->fs->lock);
path.dentry = d;
@@ -91,9 +91,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi)
return -ENOENT;
}
- read_lock(¤t->fs->lock);
+ spin_lock(¤t->fs->lock);
root = dget(current->fs->root.dentry);
- read_unlock(¤t->fs->lock);
+ spin_unlock(¤t->fs->lock);
spin_lock(&dcache_lock);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 29e850a..1318ee0 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -127,7 +127,10 @@ static void handle_tx(struct vhost_net *net)
size_t len, total_len = 0;
int err, wmem;
size_t hdr_size;
- struct socket *sock = rcu_dereference(vq->private_data);
+ struct socket *sock;
+
+ sock = rcu_dereference_check(vq->private_data,
+ lockdep_is_held(&vq->mutex));
if (!sock)
return;
@@ -582,7 +585,10 @@ static void vhost_net_disable_vq(struct vhost_net *n,
static void vhost_net_enable_vq(struct vhost_net *n,
struct vhost_virtqueue *vq)
{
- struct socket *sock = vq->private_data;
+ struct socket *sock;
+
+ sock = rcu_dereference_protected(vq->private_data,
+ lockdep_is_held(&vq->mutex));
if (!sock)
return;
if (vq == n->vqs + VHOST_NET_VQ_TX) {
@@ -598,7 +604,8 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
struct socket *sock;
mutex_lock(&vq->mutex);
- sock = vq->private_data;
+ sock = rcu_dereference_protected(vq->private_data,
+ lockdep_is_held(&vq->mutex));
vhost_net_disable_vq(n, vq);
rcu_assign_pointer(vq->private_data, NULL);
mutex_unlock(&vq->mutex);
@@ -736,7 +743,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
}
/* start polling new socket */
- oldsock = vq->private_data;
+ oldsock = rcu_dereference_protected(vq->private_data,
+ lockdep_is_held(&vq->mutex));
if (sock != oldsock) {
vhost_net_disable_vq(n, vq);
rcu_assign_pointer(vq->private_data, sock);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index e05557d..b5c4947 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -284,7 +284,7 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
vhost_dev_cleanup(dev);
memory->nregions = 0;
- dev->memory = memory;
+ RCU_INIT_POINTER(dev->memory, memory);
return 0;
}
@@ -316,8 +316,9 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
fput(dev->log_file);
dev->log_file = NULL;
/* No one will access memory at this point */
- kfree(dev->memory);
- dev->memory = NULL;
+ kfree(rcu_dereference_protected(dev->memory,
+ lockdep_is_held(&dev->mutex)));
+ RCU_INIT_POINTER(dev->memory, NULL);
if (dev->mm)
mmput(dev->mm);
dev->mm = NULL;
@@ -401,14 +402,22 @@ static int vq_access_ok(unsigned int num,
/* Caller should have device mutex but not vq mutex */
int vhost_log_access_ok(struct vhost_dev *dev)
{
- return memory_access_ok(dev, dev->memory, 1);
+ struct vhost_memory *mp;
+
+ mp = rcu_dereference_protected(dev->memory,
+ lockdep_is_held(&dev->mutex));
+ return memory_access_ok(dev, mp, 1);
}
/* Verify access for write logging. */
/* Caller should have vq mutex and device mutex */
static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
{
- return vq_memory_access_ok(log_base, vq->dev->memory,
+ struct vhost_memory *mp;
+
+ mp = rcu_dereference_protected(vq->dev->memory,
+ lockdep_is_held(&vq->mutex));
+ return vq_memory_access_ok(log_base, mp,
vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
(!vq->log_used || log_access_ok(log_base, vq->log_addr,
sizeof *vq->used +
@@ -448,7 +457,8 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
kfree(newmem);
return -EFAULT;
}
- oldmem = d->memory;
+ oldmem = rcu_dereference_protected(d->memory,
+ lockdep_is_held(&d->mutex));
rcu_assign_pointer(d->memory, newmem);
synchronize_rcu();
kfree(oldmem);
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index afd7729..af3c11d 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -106,7 +106,7 @@ struct vhost_virtqueue {
* vhost_work execution acts instead of rcu_read_lock() and the end of
* vhost_work execution acts instead of rcu_read_lock().
* Writers use virtqueue mutex. */
- void *private_data;
+ void __rcu *private_data;
/* Log write descriptors */
void __user *log_base;
struct vhost_log log[VHOST_NET_MAX_SG];
@@ -116,7 +116,7 @@ struct vhost_dev {
/* Readers use RCU to access memory table pointer
* log base pointer and features.
* Writers use mutex below.*/
- struct vhost_memory *memory;
+ struct vhost_memory __rcu *memory;
struct mm_struct *mm;
struct mutex mutex;
unsigned acked_features;
@@ -173,7 +173,11 @@ enum {
static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
{
- unsigned acked_features = rcu_dereference(dev->acked_features);
+ unsigned acked_features;
+
+ acked_features =
+ rcu_dereference_index_check(dev->acked_features,
+ lockdep_is_held(&dev->mutex));
return acked_features & (1 << bit);
}
diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c
index afe21e6..1c2c683 100644
--- a/drivers/video/amba-clcd.c
+++ b/drivers/video/amba-clcd.c
@@ -80,7 +80,10 @@ static void clcdfb_disable(struct clcd_fb *fb)
/*
* Disable CLCD clock source.
*/
- clk_disable(fb->clk);
+ if (fb->clk_enabled) {
+ fb->clk_enabled = false;
+ clk_disable(fb->clk);
+ }
}
static void clcdfb_enable(struct clcd_fb *fb, u32 cntl)
@@ -88,7 +91,10 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl)
/*
* Enable the CLCD clock source.
*/
- clk_enable(fb->clk);
+ if (!fb->clk_enabled) {
+ fb->clk_enabled = true;
+ clk_enable(fb->clk);
+ }
/*
* Bring up by first enabling..
diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h
index f3a4e15..f96a471 100644
--- a/drivers/video/matrox/matroxfb_base.h
+++ b/drivers/video/matrox/matroxfb_base.h
@@ -151,13 +151,13 @@ static inline void mga_writel(vaddr_t va, unsigned int offs, u_int32_t value) {
static inline void mga_memcpy_toio(vaddr_t va, const void* src, int len) {
#if defined(__alpha__) || defined(__i386__) || defined(__x86_64__)
/*
- * memcpy_toio works for us if:
+ * iowrite32_rep works for us if:
* (1) Copies data as 32bit quantities, not byte after byte,
* (2) Performs LE ordered stores, and
* (3) It copes with unaligned source (destination is guaranteed to be page
* aligned and length is guaranteed to be multiple of 4).
*/
- memcpy_toio(va.vaddr, src, len);
+ iowrite32_rep(va.vaddr, src, len >> 2);
#else
u_int32_t __iomem* addr = va.vaddr;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 9e60fd2..a7528b9 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -108,7 +108,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
Node *fmt;
struct file * interp_file = NULL;
char iname[BINPRM_BUF_SIZE];
- char *iname_addr = iname;
+ const char *iname_addr = iname;
int retval;
int fd_binary = -1;
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index aca9d55..396a988 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -16,7 +16,8 @@
static int load_script(struct linux_binprm *bprm,struct pt_regs *regs)
{
- char *cp, *i_name, *i_arg;
+ const char *i_arg, *i_name;
+ char *cp;
struct file *file;
char interp[BINPRM_BUF_SIZE];
int retval;
diff --git a/fs/buffer.c b/fs/buffer.c
index 50efa33..3e7dca2 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
spin_unlock(lock);
/*
* Ensure any pending I/O completes so that
- * ll_rw_block() actually writes the current
- * contents - it is a noop if I/O is still in
- * flight on potentially older contents.
+ * write_dirty_buffer() actually writes the
+ * current contents - it is a noop if I/O is
+ * still in flight on potentially older
+ * contents.
*/
- ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh);
+ write_dirty_buffer(bh, WRITE_SYNC_PLUG);
/*
* Kick off IO for the previous mapping. Note
@@ -2912,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh)
BUG_ON(buffer_unwritten(bh));
/*
- * Mask in barrier bit for a write (could be either a WRITE or a
- * WRITE_SYNC
- */
- if (buffer_ordered(bh) && (rw & WRITE))
- rw |= WRITE_BARRIER;
-
- /*
* Only clear out a write error when rewriting
*/
if (test_set_buffer_req(bh) && (rw & WRITE))
@@ -2956,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh);
/**
* ll_rw_block: low-level access to block devices (DEPRECATED)
- * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
* @nr: number of &struct buffer_heads in the array
* @bhs: array of pointers to &struct buffer_head
*
* ll_rw_block() takes an array of pointers to &struct buffer_heads, and
* requests an I/O operation on them, either a %READ or a %WRITE. The third
- * %SWRITE is like %WRITE only we make sure that the *current* data in buffers
- * are sent to disk. The fourth %READA option is described in the documentation
- * for generic_make_request() which ll_rw_block() calls.
+ * %READA option is described in the documentation for generic_make_request()
+ * which ll_rw_block() calls.
*
* This function drops any buffer that it cannot get a lock on (with the
- * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be
- * clean when doing a write request, and any buffer that appears to be
- * up-to-date when doing read request. Further it marks as clean buffers that
- * are processed for writing (the buffer cache won't assume that they are
- * actually clean until the buffer gets unlocked).
+ * BH_Lock state bit), any buffer that appears to be clean when doing a write
+ * request, and any buffer that appears to be up-to-date when doing read
+ * request. Further it marks as clean buffers that are processed for
+ * writing (the buffer cache won't assume that they are actually clean
+ * until the buffer gets unlocked).
*
* ll_rw_block sets b_end_io to simple completion handler that marks
* the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -2987,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
for (i = 0; i < nr; i++) {
struct buffer_head *bh = bhs[i];
- if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG)
- lock_buffer(bh);
- else if (!trylock_buffer(bh))
+ if (!trylock_buffer(bh))
continue;
-
- if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
- rw == SWRITE_SYNC_PLUG) {
+ if (rw == WRITE) {
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
- if (rw == SWRITE_SYNC)
- submit_bh(WRITE_SYNC, bh);
- else
- submit_bh(WRITE, bh);
+ submit_bh(WRITE, bh);
continue;
}
} else {
@@ -3016,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
}
EXPORT_SYMBOL(ll_rw_block);
+void write_dirty_buffer(struct buffer_head *bh, int rw)
+{
+ lock_buffer(bh);
+ if (!test_clear_buffer_dirty(bh)) {
+ unlock_buffer(bh);
+ return;
+ }
+ bh->b_end_io = end_buffer_write_sync;
+ get_bh(bh);
+ submit_bh(rw, bh);
+}
+EXPORT_SYMBOL(write_dirty_buffer);
+
/*
* For a data-integrity writeout, we need to wait upon any in-progress I/O
* and then start new I/O and then wait upon it. The caller must have a ref on
* the buffer_head.
*/
-int sync_dirty_buffer(struct buffer_head *bh)
+int __sync_dirty_buffer(struct buffer_head *bh, int rw)
{
int ret = 0;
@@ -3030,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
if (test_clear_buffer_dirty(bh)) {
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
- ret = submit_bh(WRITE_SYNC, bh);
+ ret = submit_bh(rw, bh);
wait_on_buffer(bh);
if (buffer_eopnotsupp(bh)) {
clear_buffer_eopnotsupp(bh);
@@ -3043,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh)
}
return ret;
}
+EXPORT_SYMBOL(__sync_dirty_buffer);
+
+int sync_dirty_buffer(struct buffer_head *bh)
+{
+ return __sync_dirty_buffer(bh, WRITE_SYNC);
+}
EXPORT_SYMBOL(sync_dirty_buffer);
/*
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a53b130..1e7a330 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -80,7 +80,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
}
} else {
inode = iget_locked(sb, CRAMINO(cramfs_inode));
- if (inode) {
+ if (inode && (inode->i_state & I_NEW)) {
setup_inode(inode, cramfs_inode);
unlock_new_inode(inode);
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 4d13bf5..83293be 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci);
* d_lookup - search for a dentry
* @parent: parent dentry
* @name: qstr of name we wish to find
+ * Returns: dentry, or NULL
*
- * Searches the children of the parent dentry for the name in question. If
- * the dentry is found its reference count is incremented and the dentry
- * is returned. The caller must use dput to free the entry when it has
- * finished using it. %NULL is returned on failure.
- *
- * __d_lookup is dcache_lock free. The hash list is protected using RCU.
- * Memory barriers are used while updating and doing lockless traversal.
- * To avoid races with d_move while rename is happening, d_lock is used.
- *
- * Overflows in memcmp(), while d_move, are avoided by keeping the length
- * and name pointer in one structure pointed by d_qstr.
- *
- * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
- * lookup is going on.
- *
- * The dentry unused LRU is not updated even if lookup finds the required dentry
- * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
- * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
- * acquisition.
- *
- * d_lookup() is protected against the concurrent renames in some unrelated
- * directory using the seqlockt_t rename_lock.
+ * d_lookup searches the children of the parent dentry for the name in
+ * question. If the dentry is found its reference count is incremented and the
+ * dentry is returned. The caller must use dput to free the entry when it has
+ * finished using it. %NULL is returned if the dentry does not exist.
*/
-
struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
{
struct dentry * dentry = NULL;
@@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
}
EXPORT_SYMBOL(d_lookup);
+/*
+ * __d_lookup - search for a dentry (racy)
+ * @parent: parent dentry
+ * @name: qstr of name we wish to find
+ * Returns: dentry, or NULL
+ *
+ * __d_lookup is like d_lookup, however it may (rarely) return a
+ * false-negative result due to unrelated rename activity.
+ *
+ * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
+ * however it must be used carefully, eg. with a following d_lookup in
+ * the case of failure.
+ *
+ * __d_lookup callers must be commented.
+ */
struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
{
unsigned int len = name->len;
@@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
struct hlist_node *node;
struct dentry *dentry;
+ /*
+ * The hash list is protected using RCU.
+ *
+ * Take d_lock when comparing a candidate dentry, to avoid races
+ * with d_move().
+ *
+ * It is possible that concurrent renames can mess up our list
+ * walk here and result in missing our dentry, resulting in the
+ * false-negative result. d_lookup() protects against concurrent
+ * renames using rename_lock seqlock.
+ *
+ * See Documentation/vfs/dcache-locking.txt for more details.
+ */
rcu_read_lock();
hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
@@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
/*
* Recheck the dentry after taking the lock - d_move may have
- * changed things. Don't bother checking the hash because we're
- * about to compare the whole name anyway.
+ * changed things. Don't bother checking the hash because
+ * we're about to compare the whole name anyway.
*/
if (dentry->d_parent != parent)
goto next;
@@ -1925,7 +1935,7 @@ static int prepend_path(const struct path *path, struct path *root,
bool slash = false;
int error = 0;
- spin_lock(&vfsmount_lock);
+ br_read_lock(vfsmount_lock);
while (dentry != root->dentry || vfsmnt != root->mnt) {
struct dentry * parent;
@@ -1954,7 +1964,7 @@ out:
if (!error && !slash)
error = prepend(buffer, buflen, "/", 1);
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return error;
global_root:
@@ -2292,11 +2302,12 @@ int path_is_under(struct path *path1, struct path *path2)
struct vfsmount *mnt = path1->mnt;
struct dentry *dentry = path1->dentry;
int res;
- spin_lock(&vfsmount_lock);
+
+ br_read_lock(vfsmount_lock);
if (mnt != path2->mnt) {
for (;;) {
if (mnt->mnt_parent == mnt) {
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return 0;
}
if (mnt->mnt_parent == path2->mnt)
@@ -2306,7 +2317,7 @@ int path_is_under(struct path *path1, struct path *path2)
dentry = mnt->mnt_mountpoint;
}
res = is_subdir(dentry, path2->dentry);
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return res;
}
EXPORT_SYMBOL(path_is_under);
diff --git a/fs/exec.c b/fs/exec.c
index 7761837..2d94552 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -361,13 +361,13 @@ err:
/*
* count() counts the number of strings in array ARGV.
*/
-static int count(char __user * __user * argv, int max)
+static int count(const char __user * const __user * argv, int max)
{
int i = 0;
if (argv != NULL) {
for (;;) {
- char __user * p;
+ const char __user * p;
if (get_user(p, argv))
return -EFAULT;
@@ -387,7 +387,7 @@ static int count(char __user * __user * argv, int max)
* processes's memory to the new process's stack. The call to get_user_pages()
* ensures the destination page is created and not swapped out.
*/
-static int copy_strings(int argc, char __user * __user * argv,
+static int copy_strings(int argc, const char __user *const __user *argv,
struct linux_binprm *bprm)
{
struct page *kmapped_page = NULL;
@@ -396,7 +396,7 @@ static int copy_strings(int argc, char __user * __user * argv,
int ret;
while (argc-- > 0) {
- char __user *str;
+ const char __user *str;
int len;
unsigned long pos;
@@ -470,12 +470,13 @@ out:
/*
* Like copy_strings, but get argv and its values from kernel memory.
*/
-int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
+int copy_strings_kernel(int argc, const char *const *argv,
+ struct linux_binprm *bprm)
{
int r;
mm_segment_t oldfs = get_fs();
set_fs(KERNEL_DS);
- r = copy_strings(argc, (char __user * __user *)argv, bprm);
+ r = copy_strings(argc, (const char __user *const __user *)argv, bprm);
set_fs(oldfs);
return r;
}
@@ -997,7 +998,7 @@ EXPORT_SYMBOL(flush_old_exec);
void setup_new_exec(struct linux_binprm * bprm)
{
int i, ch;
- char * name;
+ const char *name;
char tcomm[sizeof(current->comm)];
arch_pick_mmap_layout(current->mm);
@@ -1117,7 +1118,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
bprm->unsafe = tracehook_unsafe_exec(p);
n_fs = 1;
- write_lock(&p->fs->lock);
+ spin_lock(&p->fs->lock);
rcu_read_lock();
for (t = next_thread(p); t != p; t = next_thread(t)) {
if (t->fs == p->fs)
@@ -1134,7 +1135,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
res = 1;
}
}
- write_unlock(&p->fs->lock);
+ spin_unlock(&p->fs->lock);
return res;
}
@@ -1316,9 +1317,9 @@ EXPORT_SYMBOL(search_binary_handler);
/*
* sys_execve() executes a new program.
*/
-int do_execve(char * filename,
- char __user *__user *argv,
- char __user *__user *envp,
+int do_execve(const char * filename,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp,
struct pt_regs * regs)
{
struct linux_binprm *bprm;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 1fa23f6..1736f23 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
{
int i, err = 0;
- ll_rw_block(SWRITE, nr_bhs, bhs);
+ for (i = 0; i < nr_bhs; i++)
+ write_dirty_buffer(bhs[i], WRITE);
+
for (i = 0; i < nr_bhs; i++) {
wait_on_buffer(bhs[i]);
if (buffer_eopnotsupp(bhs[i])) {
diff --git a/fs/file_table.c b/fs/file_table.c
index edecd36..a04bdd8 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -20,7 +20,9 @@
#include
#include
#include
+#include
#include
+#include
#include
#include
@@ -32,8 +34,8 @@ struct files_stat_struct files_stat = {
.max_files = NR_FILE
};
-/* public. Not pretty! */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+DECLARE_LGLOCK(files_lglock);
+DEFINE_LGLOCK(files_lglock);
/* SLAB cache for file structures */
static struct kmem_cache *filp_cachep __read_mostly;
@@ -249,7 +251,7 @@ static void __fput(struct file *file)
cdev_put(inode->i_cdev);
fops_put(file->f_op);
put_pid(file->f_owner.pid);
- file_kill(file);
+ file_sb_list_del(file);
if (file->f_mode & FMODE_WRITE)
drop_file_write_access(file);
file->f_path.dentry = NULL;
@@ -328,41 +330,107 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
return file;
}
-
void put_filp(struct file *file)
{
if (atomic_long_dec_and_test(&file->f_count)) {
security_file_free(file);
- file_kill(file);
+ file_sb_list_del(file);
file_free(file);
}
}
-void file_move(struct file *file, struct list_head *list)
+static inline int file_list_cpu(struct file *file)
{
- if (!list)
- return;
- file_list_lock();
- list_move(&file->f_u.fu_list, list);
- file_list_unlock();
+#ifdef CONFIG_SMP
+ return file->f_sb_list_cpu;
+#else
+ return smp_processor_id();
+#endif
+}
+
+/* helper for file_sb_list_add to reduce ifdefs */
+static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
+{
+ struct list_head *list;
+#ifdef CONFIG_SMP
+ int cpu;
+ cpu = smp_processor_id();
+ file->f_sb_list_cpu = cpu;
+ list = per_cpu_ptr(sb->s_files, cpu);
+#else
+ list = &sb->s_files;
+#endif
+ list_add(&file->f_u.fu_list, list);
}
-void file_kill(struct file *file)
+/**
+ * file_sb_list_add - add a file to the sb's file list
+ * @file: file to add
+ * @sb: sb to add it to
+ *
+ * Use this function to associate a file with the superblock of the inode it
+ * refers to.
+ */
+void file_sb_list_add(struct file *file, struct super_block *sb)
+{
+ lg_local_lock(files_lglock);
+ __file_sb_list_add(file, sb);
+ lg_local_unlock(files_lglock);
+}
+
+/**
+ * file_sb_list_del - remove a file from the sb's file list
+ * @file: file to remove
+ * @sb: sb to remove it from
+ *
+ * Use this function to remove a file from its superblock.
+ */
+void file_sb_list_del(struct file *file)
{
if (!list_empty(&file->f_u.fu_list)) {
- file_list_lock();
+ lg_local_lock_cpu(files_lglock, file_list_cpu(file));
list_del_init(&file->f_u.fu_list);
- file_list_unlock();
+ lg_local_unlock_cpu(files_lglock, file_list_cpu(file));
}
}
+#ifdef CONFIG_SMP
+
+/*
+ * These macros iterate all files on all CPUs for a given superblock.
+ * files_lglock must be held globally.
+ */
+#define do_file_list_for_each_entry(__sb, __file) \
+{ \
+ int i; \
+ for_each_possible_cpu(i) { \
+ struct list_head *list; \
+ list = per_cpu_ptr((__sb)->s_files, i); \
+ list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry \
+ } \
+}
+
+#else
+
+#define do_file_list_for_each_entry(__sb, __file) \
+{ \
+ struct list_head *list; \
+ list = &(sb)->s_files; \
+ list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry \
+}
+
+#endif
+
int fs_may_remount_ro(struct super_block *sb)
{
struct file *file;
-
/* Check that no files are currently opened for writing. */
- file_list_lock();
- list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
+ lg_global_lock(files_lglock);
+ do_file_list_for_each_entry(sb, file) {
struct inode *inode = file->f_path.dentry->d_inode;
/* File with pending delete? */
@@ -372,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb)
/* Writeable file? */
if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
goto too_bad;
- }
- file_list_unlock();
+ } while_file_list_for_each_entry;
+ lg_global_unlock(files_lglock);
return 1; /* Tis' cool bro. */
too_bad:
- file_list_unlock();
+ lg_global_unlock(files_lglock);
return 0;
}
@@ -392,8 +460,8 @@ void mark_files_ro(struct super_block *sb)
struct file *f;
retry:
- file_list_lock();
- list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
+ lg_global_lock(files_lglock);
+ do_file_list_for_each_entry(sb, f) {
struct vfsmount *mnt;
if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
continue;
@@ -408,16 +476,13 @@ retry:
continue;
file_release_write(f);
mnt = mntget(f->f_path.mnt);
- file_list_unlock();
- /*
- * This can sleep, so we can't hold
- * the file_list_lock() spinlock.
- */
+ /* This can sleep, so we can't hold the spinlock. */
+ lg_global_unlock(files_lglock);
mnt_drop_write(mnt);
mntput(mnt);
goto retry;
- }
- file_list_unlock();
+ } while_file_list_for_each_entry;
+ lg_global_unlock(files_lglock);
}
void __init files_init(unsigned long mempages)
@@ -437,5 +502,6 @@ void __init files_init(unsigned long mempages)
if (files_stat.max_files < NR_FILE)
files_stat.max_files = NR_FILE;
files_defer_init();
+ lg_lock_init(files_lglock);
percpu_counter_init(&nr_files, 0);
}
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 1ee40eb..ed45a9c 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
{
struct path old_root;
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
old_root = fs->root;
fs->root = *path;
path_get(path);
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
if (old_root.dentry)
path_put(&old_root);
}
@@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
{
struct path old_pwd;
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
old_pwd = fs->pwd;
fs->pwd = *path;
path_get(path);
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
if (old_pwd.dentry)
path_put(&old_pwd);
@@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
task_lock(p);
fs = p->fs;
if (fs) {
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
if (fs->root.dentry == old_root->dentry
&& fs->root.mnt == old_root->mnt) {
path_get(new_root);
@@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
fs->pwd = *new_root;
count++;
}
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
}
task_unlock(p);
} while_each_thread(g, p);
@@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk)
if (fs) {
int kill;
task_lock(tsk);
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
tsk->fs = NULL;
kill = !--fs->users;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
task_unlock(tsk);
if (kill)
free_fs_struct(fs);
@@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
if (fs) {
fs->users = 1;
fs->in_exec = 0;
- rwlock_init(&fs->lock);
+ spin_lock_init(&fs->lock);
fs->umask = old->umask;
get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
}
@@ -121,10 +121,10 @@ int unshare_fs_struct(void)
return -ENOMEM;
task_lock(current);
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
kill = !--fs->users;
current->fs = new_fs;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
task_unlock(current);
if (kill)
@@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask);
/* to be mentioned only in INIT_TASK */
struct fs_struct init_fs = {
.users = 1,
- .lock = __RW_LOCK_UNLOCKED(init_fs.lock),
+ .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock),
.umask = 0022,
};
@@ -156,14 +156,14 @@ void daemonize_fs_struct(void)
task_lock(current);
- write_lock(&init_fs.lock);
+ spin_lock(&init_fs.lock);
init_fs.users++;
- write_unlock(&init_fs.lock);
+ spin_unlock(&init_fs.lock);
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
current->fs = &init_fs;
kill = !--fs->users;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
task_unlock(current);
if (kill)
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 99800e5..6bc9e3a 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -94,6 +94,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value,
if (error < 0)
goto failed;
inode->i_mode = mode;
+ inode->i_ctime = CURRENT_TIME;
if (error == 0) {
posix_acl_release(acl);
acl = NULL;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index dd1e555..f7dc9b5 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -104,7 +104,7 @@ static char *__dentry_name(struct dentry *dentry, char *name)
__putname(name);
return NULL;
}
- strncpy(name, root, PATH_MAX);
+ strlcpy(name, root, PATH_MAX);
if (len > p - name) {
__putname(name);
return NULL;
@@ -876,7 +876,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
char *path = dentry_name(dentry);
int err = -ENOMEM;
if (path) {
- int err = hostfs_do_readlink(path, link, PATH_MAX);
+ err = hostfs_do_readlink(path, link, PATH_MAX);
if (err == PATH_MAX)
err = -E2BIG;
__putname(path);
diff --git a/fs/internal.h b/fs/internal.h
index 6b706bc..a6910e9 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#include
+
struct super_block;
struct linux_binprm;
struct path;
@@ -70,7 +72,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
extern void __init mnt_init(void);
-extern spinlock_t vfsmount_lock;
+DECLARE_BRLOCK(vfsmount_lock);
+
/*
* fs_struct.c
@@ -80,6 +83,8 @@ extern void chroot_fs_refs(struct path *, struct path *);
/*
* file_table.c
*/
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
extern void mark_files_ro(struct super_block *);
extern struct file *get_empty_filp(void);
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index b0435dd..05a38b9 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
{
int i;
- ll_rw_block(SWRITE, *batch_count, bhs);
+ for (i = 0; i < *batch_count; i++)
+ write_dirty_buffer(bhs[i], WRITE);
+
for (i = 0; i < *batch_count; i++) {
struct buffer_head *bh = bhs[i];
clear_buffer_jwrite(bh);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 28a9dda..95d8c11 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal,
struct buffer_head *bh;
journal_header_t *header;
int ret;
- int barrier_done = 0;
if (is_journal_aborted(journal))
return 0;
@@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal,
JBUFFER_TRACE(descriptor, "write commit block");
set_buffer_dirty(bh);
+
if (journal->j_flags & JFS_BARRIER) {
- set_buffer_ordered(bh);
- barrier_done = 1;
- }
- ret = sync_dirty_buffer(bh);
- if (barrier_done)
- clear_buffer_ordered(bh);
- /* is it possible for another commit to fail at roughly
- * the same time as this one? If so, we don't want to
- * trust the barrier flag in the super, but instead want
- * to remember if we sent a barrier request
- */
- if (ret == -EOPNOTSUPP && barrier_done) {
- char b[BDEVNAME_SIZE];
+ ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER);
- printk(KERN_WARNING
- "JBD: barrier-based sync failed on %s - "
- "disabling barriers\n",
- bdevname(journal->j_dev, b));
- spin_lock(&journal->j_state_lock);
- journal->j_flags &= ~JFS_BARRIER;
- spin_unlock(&journal->j_state_lock);
+ /*
+ * Is it possible for another commit to fail at roughly
+ * the same time as this one? If so, we don't want to
+ * trust the barrier flag in the super, but instead want
+ * to remember if we sent a barrier request
+ */
+ if (ret == -EOPNOTSUPP) {
+ char b[BDEVNAME_SIZE];
- /* And try again, without the barrier */
- set_buffer_uptodate(bh);
- set_buffer_dirty(bh);
+ printk(KERN_WARNING
+ "JBD: barrier-based sync failed on %s - "
+ "disabling barriers\n",
+ bdevname(journal->j_dev, b));
+ spin_lock(&journal->j_state_lock);
+ journal->j_flags &= ~JFS_BARRIER;
+ spin_unlock(&journal->j_state_lock);
+
+ /* And try again, without the barrier */
+ set_buffer_uptodate(bh);
+ set_buffer_dirty(bh);
+ ret = sync_dirty_buffer(bh);
+ }
+ } else {
ret = sync_dirty_buffer(bh);
}
+
put_bh(bh); /* One for getblk() */
journal_put_journal_head(descriptor);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f19ce94..2c4b1f1 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait)
if (wait)
sync_dirty_buffer(bh);
else
- ll_rw_block(SWRITE, 1, &bh);
+ write_dirty_buffer(bh, WRITE);
out:
/* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index ad71732..d290183 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal,
set_buffer_jwrite(bh);
BUFFER_TRACE(bh, "write");
set_buffer_dirty(bh);
- ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
+ write_dirty_buffer(bh, write_op);
}
#endif
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 1c23a0f..5247e7f 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count)
{
int i;
- ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs);
+ for (i = 0; i < *batch_count; i++)
+ write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE);
+
for (i = 0; i < *batch_count; i++) {
struct buffer_head *bh = journal->j_chkpt_bhs[i];
clear_buffer_jwrite(bh);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f52e5e8..7c068c1 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal,
struct commit_header *tmp;
struct buffer_head *bh;
int ret;
- int barrier_done = 0;
struct timespec now = current_kernel_time();
if (is_journal_aborted(journal))
@@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal,
if (journal->j_flags & JBD2_BARRIER &&
!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
- set_buffer_ordered(bh);
- barrier_done = 1;
- }
- ret = submit_bh(WRITE_SYNC_PLUG, bh);
- if (barrier_done)
- clear_buffer_ordered(bh);
-
- /* is it possible for another commit to fail at roughly
- * the same time as this one? If so, we don't want to
- * trust the barrier flag in the super, but instead want
- * to remember if we sent a barrier request
- */
- if (ret == -EOPNOTSUPP && barrier_done) {
- printk(KERN_WARNING
- "JBD2: Disabling barriers on %s, "
- "not supported by device\n", journal->j_devname);
- write_lock(&journal->j_state_lock);
- journal->j_flags &= ~JBD2_BARRIER;
- write_unlock(&journal->j_state_lock);
+ ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh);
+ if (ret == -EOPNOTSUPP) {
+ printk(KERN_WARNING
+ "JBD2: Disabling barriers on %s, "
+ "not supported by device\n", journal->j_devname);
+ write_lock(&journal->j_state_lock);
+ journal->j_flags &= ~JBD2_BARRIER;
+ write_unlock(&journal->j_state_lock);
- /* And try again, without the barrier */
- lock_buffer(bh);
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
+ /* And try again, without the barrier */
+ lock_buffer(bh);
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
+ ret = submit_bh(WRITE_SYNC_PLUG, bh);
+ }
+ } else {
ret = submit_bh(WRITE_SYNC_PLUG, bh);
}
*cbh = bh;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ad5866a..0e8014e 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
set_buffer_uptodate(bh);
}
} else
- ll_rw_block(SWRITE, 1, &bh);
+ write_dirty_buffer(bh, WRITE);
out:
/* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index a360b06..9ad321f 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal,
set_buffer_jwrite(bh);
BUFFER_TRACE(bh, "write");
set_buffer_dirty(bh);
- ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
+ write_dirty_buffer(bh, write_op);
}
#endif
diff --git a/fs/mbcache.c b/fs/mbcache.c
index cf4e6cd..9344474 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -80,6 +80,7 @@ struct mb_cache {
struct list_head c_cache_list;
const char *c_name;
atomic_t c_entry_count;
+ int c_max_entries;
int c_bucket_bits;
struct kmem_cache *c_entry_cache;
struct list_head *c_block_hash;
@@ -243,6 +244,12 @@ mb_cache_create(const char *name, int bucket_bits)
if (!cache->c_entry_cache)
goto fail2;
+ /*
+ * Set an upper limit on the number of cache entries so that the hash
+ * chains won't grow too long.
+ */
+ cache->c_max_entries = bucket_count << 4;
+
spin_lock(&mb_cache_spinlock);
list_add(&cache->c_cache_list, &mb_cache_list);
spin_unlock(&mb_cache_spinlock);
@@ -333,7 +340,6 @@ mb_cache_destroy(struct mb_cache *cache)
kfree(cache);
}
-
/*
* mb_cache_entry_alloc()
*
@@ -345,17 +351,29 @@ mb_cache_destroy(struct mb_cache *cache)
struct mb_cache_entry *
mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
{
- struct mb_cache_entry *ce;
-
- ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
- if (ce) {
+ struct mb_cache_entry *ce = NULL;
+
+ if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
+ spin_lock(&mb_cache_spinlock);
+ if (!list_empty(&mb_cache_lru_list)) {
+ ce = list_entry(mb_cache_lru_list.next,
+ struct mb_cache_entry, e_lru_list);
+ list_del_init(&ce->e_lru_list);
+ __mb_cache_entry_unhash(ce);
+ }
+ spin_unlock(&mb_cache_spinlock);
+ }
+ if (!ce) {
+ ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
+ if (!ce)
+ return NULL;
atomic_inc(&cache->c_entry_count);
INIT_LIST_HEAD(&ce->e_lru_list);
INIT_LIST_HEAD(&ce->e_block_list);
ce->e_cache = cache;
- ce->e_used = 1 + MB_CACHE_WRITER;
ce->e_queued = 0;
}
+ ce->e_used = 1 + MB_CACHE_WRITER;
return ce;
}
diff --git a/fs/namei.c b/fs/namei.c
index 17ea76b..24896e8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -595,15 +595,16 @@ int follow_up(struct path *path)
{
struct vfsmount *parent;
struct dentry *mountpoint;
- spin_lock(&vfsmount_lock);
+
+ br_read_lock(vfsmount_lock);
parent = path->mnt->mnt_parent;
if (parent == path->mnt) {
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return 0;
}
mntget(parent);
mountpoint = dget(path->mnt->mnt_mountpoint);
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
dput(path->dentry);
path->dentry = mountpoint;
mntput(path->mnt);
@@ -686,6 +687,35 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
}
/*
+ * Allocate a dentry with name and parent, and perform a parent
+ * directory ->lookup on it. Returns the new dentry, or ERR_PTR
+ * on error. parent->d_inode->i_mutex must be held. d_lookup must
+ * have verified that no child exists while under i_mutex.
+ */
+static struct dentry *d_alloc_and_lookup(struct dentry *parent,
+ struct qstr *name, struct nameidata *nd)
+{
+ struct inode *inode = parent->d_inode;
+ struct dentry *dentry;
+ struct dentry *old;
+
+ /* Don't create child dentry for a dead directory. */
+ if (unlikely(IS_DEADDIR(inode)))
+ return ERR_PTR(-ENOENT);
+
+ dentry = d_alloc(parent, name);
+ if (unlikely(!dentry))
+ return ERR_PTR(-ENOMEM);
+
+ old = inode->i_op->lookup(inode, dentry, nd);
+ if (unlikely(old)) {
+ dput(dentry);
+ dentry = old;
+ }
+ return dentry;
+}
+
+/*
* It's more convoluted than I'd like it to be, but... it's still fairly
* small and for now I'd prefer to have fast path as straight as possible.
* It _is_ time-critical.
@@ -706,9 +736,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
return err;
}
+ /*
+ * Rename seqlock is not required here because in the off chance
+ * of a false negative due to a concurrent rename, we're going to
+ * do the non-racy lookup, below.
+ */
dentry = __d_lookup(nd->path.dentry, name);
if (!dentry)
goto need_lookup;
+found:
if (dentry->d_op && dentry->d_op->d_revalidate)
goto need_revalidate;
done:
@@ -724,56 +760,28 @@ need_lookup:
mutex_lock(&dir->i_mutex);
/*
* First re-do the cached lookup just in case it was created
- * while we waited for the directory semaphore..
+ * while we waited for the directory semaphore, or the first
+ * lookup failed due to an unrelated rename.
*
- * FIXME! This could use version numbering or similar to
- * avoid unnecessary cache lookups.
- *
- * The "dcache_lock" is purely to protect the RCU list walker
- * from concurrent renames at this point (we mustn't get false
- * negatives from the RCU list walk here, unlike the optimistic
- * fast walk).
- *
- * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
+ * This could use version numbering or similar to avoid unnecessary
+ * cache lookups, but then we'd have to do the first lookup in the
+ * non-racy way. However in the common case here, everything should
+ * be hot in cache, so would it be a big win?
*/
dentry = d_lookup(parent, name);
- if (!dentry) {
- struct dentry *new;
-
- /* Don't create child dentry for a dead directory. */
- dentry = ERR_PTR(-ENOENT);
- if (IS_DEADDIR(dir))
- goto out_unlock;
-
- new = d_alloc(parent, name);
- dentry = ERR_PTR(-ENOMEM);
- if (new) {
- dentry = dir->i_op->lookup(dir, new, nd);
- if (dentry)
- dput(new);
- else
- dentry = new;
- }
-out_unlock:
+ if (likely(!dentry)) {
+ dentry = d_alloc_and_lookup(parent, name, nd);
mutex_unlock(&dir->i_mutex);
if (IS_ERR(dentry))
goto fail;
goto done;
}
-
/*
* Uhhuh! Nasty case: the cache was re-populated while
* we waited on the semaphore. Need to revalidate.
*/
mutex_unlock(&dir->i_mutex);
- if (dentry->d_op && dentry->d_op->d_revalidate) {
- dentry = do_revalidate(dentry, nd);
- if (!dentry)
- dentry = ERR_PTR(-ENOENT);
- }
- if (IS_ERR(dentry))
- goto fail;
- goto done;
+ goto found;
need_revalidate:
dentry = do_revalidate(dentry, nd);
@@ -1130,35 +1138,18 @@ static struct dentry *__lookup_hash(struct qstr *name,
goto out;
}
- dentry = __d_lookup(base, name);
-
- /* lockess __d_lookup may fail due to concurrent d_move()
- * in some unrelated directory, so try with d_lookup
+ /*
+ * Don't bother with __d_lookup: callers are for creat as
+ * well as unlink, so a lot of the time it would cost
+ * a double lookup.
*/
- if (!dentry)
- dentry = d_lookup(base, name);
+ dentry = d_lookup(base, name);
if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
dentry = do_revalidate(dentry, nd);
- if (!dentry) {
- struct dentry *new;
-
- /* Don't create child dentry for a dead directory. */
- dentry = ERR_PTR(-ENOENT);
- if (IS_DEADDIR(inode))
- goto out;
-
- new = d_alloc(base, name);
- dentry = ERR_PTR(-ENOMEM);
- if (!new)
- goto out;
- dentry = inode->i_op->lookup(inode, new, nd);
- if (!dentry)
- dentry = new;
- else
- dput(new);
- }
+ if (!dentry)
+ dentry = d_alloc_and_lookup(base, name, nd);
out:
return dentry;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 2e10cb1..de402eb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -11,6 +11,8 @@
#include
#include
#include
+#include
+#include
#include
#include
#include
@@ -38,12 +40,10 @@
#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
#define HASH_SIZE (1UL << HASH_SHIFT)
-/* spinlock for vfsmount related operations, inplace of dcache_lock */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
-
static int event;
static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
+static DEFINE_SPINLOCK(mnt_id_lock);
static int mnt_id_start = 0;
static int mnt_group_start = 1;
@@ -55,6 +55,16 @@ static struct rw_semaphore namespace_sem;
struct kobject *fs_kobj;
EXPORT_SYMBOL_GPL(fs_kobj);
+/*
+ * vfsmount lock may be taken for read to prevent changes to the
+ * vfsmount hash, ie. during mountpoint lookups or walking back
+ * up the tree.
+ *
+ * It should be taken for write in all cases where the vfsmount
+ * tree or hash is modified or when a vfsmount structure is modified.
+ */
+DEFINE_BRLOCK(vfsmount_lock);
+
static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
{
unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -65,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
-/* allocation is serialized by namespace_sem */
+/*
+ * allocation is serialized by namespace_sem, but we need the spinlock to
+ * serialize with freeing.
+ */
static int mnt_alloc_id(struct vfsmount *mnt)
{
int res;
retry:
ida_pre_get(&mnt_id_ida, GFP_KERNEL);
- spin_lock(&vfsmount_lock);
+ spin_lock(&mnt_id_lock);
res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
if (!res)
mnt_id_start = mnt->mnt_id + 1;
- spin_unlock(&vfsmount_lock);
+ spin_unlock(&mnt_id_lock);
if (res == -EAGAIN)
goto retry;
@@ -86,11 +99,11 @@ retry:
static void mnt_free_id(struct vfsmount *mnt)
{
int id = mnt->mnt_id;
- spin_lock(&vfsmount_lock);
+ spin_lock(&mnt_id_lock);
ida_remove(&mnt_id_ida, id);
if (mnt_id_start > id)
mnt_id_start = id;
- spin_unlock(&vfsmount_lock);
+ spin_unlock(&mnt_id_lock);
}
/*
@@ -348,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
{
int ret = 0;
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
mnt->mnt_flags |= MNT_WRITE_HOLD;
/*
* After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -382,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt)
*/
smp_wmb();
mnt->mnt_flags &= ~MNT_WRITE_HOLD;
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
return ret;
}
static void __mnt_unmake_readonly(struct vfsmount *mnt)
{
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
mnt->mnt_flags &= ~MNT_READONLY;
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
@@ -414,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt)
/*
* find the first or last mount at @dentry on vfsmount @mnt depending on
* @dir. If @dir is set return the first mount else return the last mount.
+ * vfsmount_lock must be held for read or write.
*/
struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
int dir)
@@ -443,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
struct vfsmount *lookup_mnt(struct path *path)
{
struct vfsmount *child_mnt;
- spin_lock(&vfsmount_lock);
+
+ br_read_lock(vfsmount_lock);
if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
mntget(child_mnt);
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return child_mnt;
}
@@ -455,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt)
return mnt->mnt_ns == current->nsproxy->mnt_ns;
}
+/*
+ * vfsmount lock must be held for write
+ */
static void touch_mnt_namespace(struct mnt_namespace *ns)
{
if (ns) {
@@ -463,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns)
}
}
+/*
+ * vfsmount lock must be held for write
+ */
static void __touch_mnt_namespace(struct mnt_namespace *ns)
{
if (ns && ns->event != event) {
@@ -471,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
}
}
+/*
+ * vfsmount lock must be held for write
+ */
static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
{
old_path->dentry = mnt->mnt_mountpoint;
@@ -482,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
old_path->dentry->d_mounted--;
}
+/*
+ * vfsmount lock must be held for write
+ */
void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
struct vfsmount *child_mnt)
{
@@ -490,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
dentry->d_mounted++;
}
+/*
+ * vfsmount lock must be held for write
+ */
static void attach_mnt(struct vfsmount *mnt, struct path *path)
{
mnt_set_mountpoint(path->mnt, path->dentry, mnt);
@@ -499,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path)
}
/*
- * the caller must hold vfsmount_lock
+ * vfsmount lock must be held for write
*/
static void commit_tree(struct vfsmount *mnt)
{
@@ -623,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt)
void mntput_no_expire(struct vfsmount *mnt)
{
repeat:
- if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
- if (likely(!mnt->mnt_pinned)) {
- spin_unlock(&vfsmount_lock);
- __mntput(mnt);
- return;
- }
- atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
- mnt->mnt_pinned = 0;
- spin_unlock(&vfsmount_lock);
- acct_auto_close_mnt(mnt);
- goto repeat;
+ if (atomic_add_unless(&mnt->mnt_count, -1, 1))
+ return;
+ br_write_lock(vfsmount_lock);
+ if (!atomic_dec_and_test(&mnt->mnt_count)) {
+ br_write_unlock(vfsmount_lock);
+ return;
+ }
+ if (likely(!mnt->mnt_pinned)) {
+ br_write_unlock(vfsmount_lock);
+ __mntput(mnt);
+ return;
}
+ atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
+ mnt->mnt_pinned = 0;
+ br_write_unlock(vfsmount_lock);
+ acct_auto_close_mnt(mnt);
+ goto repeat;
}
-
EXPORT_SYMBOL(mntput_no_expire);
void mnt_pin(struct vfsmount *mnt)
{
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
mnt->mnt_pinned++;
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
EXPORT_SYMBOL(mnt_pin);
void mnt_unpin(struct vfsmount *mnt)
{
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
if (mnt->mnt_pinned) {
atomic_inc(&mnt->mnt_count);
mnt->mnt_pinned--;
}
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
EXPORT_SYMBOL(mnt_unpin);
@@ -746,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p)
struct mnt_namespace *ns = p->ns;
int res = 0;
- spin_lock(&vfsmount_lock);
+ br_read_lock(vfsmount_lock);
if (p->event != ns->event) {
p->event = ns->event;
res = 1;
}
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return res;
}
@@ -952,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt)
int minimum_refs = 0;
struct vfsmount *p;
- spin_lock(&vfsmount_lock);
+ br_read_lock(vfsmount_lock);
for (p = mnt; p; p = next_mnt(p, mnt)) {
actual_refs += atomic_read(&p->mnt_count);
minimum_refs += 2;
}
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
if (actual_refs > minimum_refs)
return 0;
@@ -984,10 +1018,10 @@ int may_umount(struct vfsmount *mnt)
{
int ret = 1;
down_read(&namespace_sem);
- spin_lock(&vfsmount_lock);
+ br_read_lock(vfsmount_lock);
if (propagate_mount_busy(mnt, 2))
ret = 0;
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
up_read(&namespace_sem);
return ret;
}
@@ -1003,13 +1037,14 @@ void release_mounts(struct list_head *head)
if (mnt->mnt_parent != mnt) {
struct dentry *dentry;
struct vfsmount *m;
- spin_lock(&vfsmount_lock);
+
+ br_write_lock(vfsmount_lock);
dentry = mnt->mnt_mountpoint;
m = mnt->mnt_parent;
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
m->mnt_ghosts--;
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
dput(dentry);
mntput(m);
}
@@ -1017,6 +1052,10 @@ void release_mounts(struct list_head *head)
}
}
+/*
+ * vfsmount lock must be held for write
+ * namespace_sem must be held for write
+ */
void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
{
struct vfsmount *p;
@@ -1107,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
}
down_write(&namespace_sem);
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
event++;
if (!(flags & MNT_DETACH))
@@ -1119,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
umount_tree(mnt, 1, &umount_list);
retval = 0;
}
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umount_list);
return retval;
@@ -1231,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
q = clone_mnt(p, p->mnt_root, flag);
if (!q)
goto Enomem;
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
list_add_tail(&q->mnt_list, &res->mnt_list);
attach_mnt(q, &path);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
}
return res;
Enomem:
if (res) {
LIST_HEAD(umount_list);
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
umount_tree(res, 0, &umount_list);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
release_mounts(&umount_list);
}
return NULL;
@@ -1262,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt)
{
LIST_HEAD(umount_list);
down_write(&namespace_sem);
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
umount_tree(mnt, 0, &umount_list);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umount_list);
}
@@ -1392,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
if (err)
goto out_cleanup_ids;
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
if (IS_MNT_SHARED(dest_mnt)) {
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1411,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
list_del_init(&child->mnt_hash);
commit_tree(child);
}
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
+
return 0;
out_cleanup_ids:
@@ -1466,10 +1506,10 @@ static int do_change_type(struct path *path, int flag)
goto out_unlock;
}
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
change_mnt_propagation(m, type);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
out_unlock:
up_write(&namespace_sem);
@@ -1513,9 +1553,10 @@ static int do_loopback(struct path *path, char *old_name,
err = graft_tree(mnt, path);
if (err) {
LIST_HEAD(umount_list);
- spin_lock(&vfsmount_lock);
+
+ br_write_lock(vfsmount_lock);
umount_tree(mnt, 0, &umount_list);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
release_mounts(&umount_list);
}
@@ -1568,16 +1609,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
else
err = do_remount_sb(sb, flags, data, 0);
if (!err) {
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
path->mnt->mnt_flags = mnt_flags;
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
up_write(&sb->s_umount);
if (!err) {
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
touch_mnt_namespace(path->mnt->mnt_ns);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
return err;
}
@@ -1754,7 +1795,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
return;
down_write(&namespace_sem);
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
/* extract from the expiration list every vfsmount that matches the
* following criteria:
@@ -1773,7 +1814,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
touch_mnt_namespace(mnt->mnt_ns);
umount_tree(mnt, 1, &umounts);
}
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umounts);
@@ -1830,6 +1871,8 @@ resume:
/*
* process a list of expirable mountpoints with the intent of discarding any
* submounts of a specific parent mountpoint
+ *
+ * vfsmount_lock must be held for write
*/
static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
{
@@ -2048,9 +2091,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
kfree(new_ns);
return ERR_PTR(-ENOMEM);
}
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
/*
* Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2244,7 +2287,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out2; /* not attached */
/* make sure we can reach put_old from new_root */
tmp = old.mnt;
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
if (tmp != new.mnt) {
for (;;) {
if (tmp->mnt_parent == tmp)
@@ -2264,7 +2307,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* mount new_root on / */
attach_mnt(new.mnt, &root_parent);
touch_mnt_namespace(current->nsproxy->mnt_ns);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
chroot_fs_refs(&root, &new);
error = 0;
path_put(&root_parent);
@@ -2279,7 +2322,7 @@ out1:
out0:
return error;
out3:
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
goto out2;
}
@@ -2326,6 +2369,8 @@ void __init mnt_init(void)
for (u = 0; u < HASH_SIZE; u++)
INIT_LIST_HEAD(&mount_hashtable[u]);
+ br_lock_init(vfsmount_lock);
+
err = sysfs_init();
if (err)
printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2344,9 +2389,9 @@ void put_mnt_ns(struct mnt_namespace *ns)
if (!atomic_dec_and_test(&ns->count))
return;
down_write(&namespace_sem);
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
umount_tree(ns->root, 0, &umount_list);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umount_list);
kfree(ns);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 26a510a..6c2aad4 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -63,7 +63,6 @@ config NFS_V3_ACL
config NFS_V4
bool "NFS client support for NFS version 4"
depends on NFS_FS
- select RPCSEC_GSS_KRB5
help
This option enables support for version 4 of the NFS protocol
(RFC 3530) in the kernel's NFS client.
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 29539ce..e257172 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -140,6 +140,13 @@ nfs_opendir(struct inode *inode, struct file *filp)
/* Call generic open code in order to cache credentials */
res = nfs_open(inode, filp);
+ if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
+ /* This is a mountpoint, so d_revalidate will never
+ * have been called, so we need to refresh the
+ * inode (for close-open consistency) ourselves.
+ */
+ __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ }
return res;
}
@@ -1103,7 +1110,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
goto no_open_dput;
/* We can't create new files, or truncate existing ones here */
- openflags &= ~(O_CREAT|O_TRUNC);
+ openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
/*
* Note: we're not holding inode->i_mutex and so may be racing with
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2d141a7..eb51bd6 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -323,7 +323,7 @@ nfs_file_fsync(struct file *file, int datasync)
have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
if (have_error)
ret = xchg(&ctx->error, 0);
- if (!ret)
+ if (!ret && status < 0)
ret = status;
return ret;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7ffbb98..089da5b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2036,7 +2036,8 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
struct rpc_cred *cred;
struct nfs4_state *state;
struct dentry *res;
- fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
+ int open_flags = nd->intent.open.flags;
+ fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
if (nd->flags & LOOKUP_CREATE) {
attr.ia_mode = nd->intent.open.create_mode;
@@ -2044,8 +2045,9 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
if (!IS_POSIXACL(dir))
attr.ia_mode &= ~current_umask();
} else {
+ open_flags &= ~O_EXCL;
attr.ia_valid = 0;
- BUG_ON(nd->intent.open.flags & O_CREAT);
+ BUG_ON(open_flags & O_CREAT);
}
cred = rpc_lookup_cred();
@@ -2054,7 +2056,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
parent = dentry->d_parent;
/* Protect against concurrent sillydeletes */
nfs_block_sillyrename(parent);
- state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred);
+ state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred);
put_rpccred(cred);
if (IS_ERR(state)) {
if (PTR_ERR(state) == -ENOENT) {
@@ -2273,8 +2275,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct
out:
if (page)
__free_page(page);
- if (locations)
- kfree(locations);
+ kfree(locations);
return status;
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ee26316..ec3966e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -655,6 +655,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
if (nfss->options & NFS_OPTION_FSCACHE)
seq_printf(m, ",fsc");
+
+ if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) {
+ if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
+ seq_printf(m, ",lookupcache=none");
+ else
+ seq_printf(m, ",lookupcache=pos");
+ }
}
/*
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 503b9da..95932f5 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -69,7 +69,6 @@ config NFSD_V4
depends on NFSD && PROC_FS && EXPERIMENTAL
select NFSD_V3
select FS_POSIX_ACL
- select RPCSEC_GSS_KRB5
help
This option enables support in your system's NFS server for
version 4 of the NFS protocol (RFC 3530).
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1fa86b9..9222633 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
{
struct the_nilfs *nilfs = sbi->s_nilfs;
int err;
- int barrier_done = 0;
- if (nilfs_test_opt(sbi, BARRIER)) {
- set_buffer_ordered(nilfs->ns_sbh[0]);
- barrier_done = 1;
- }
retry:
set_buffer_dirty(nilfs->ns_sbh[0]);
- err = sync_dirty_buffer(nilfs->ns_sbh[0]);
- if (err == -EOPNOTSUPP && barrier_done) {
- nilfs_warning(sbi->s_super, __func__,
- "barrier-based sync failed. "
- "disabling barriers\n");
- nilfs_clear_opt(sbi, BARRIER);
- barrier_done = 0;
- clear_buffer_ordered(nilfs->ns_sbh[0]);
- goto retry;
+
+ if (nilfs_test_opt(sbi, BARRIER)) {
+ err = __sync_dirty_buffer(nilfs->ns_sbh[0],
+ WRITE_SYNC | WRITE_BARRIER);
+ if (err == -EOPNOTSUPP) {
+ nilfs_warning(sbi->s_super, __func__,
+ "barrier-based sync failed. "
+ "disabling barriers\n");
+ nilfs_clear_opt(sbi, BARRIER);
+ goto retry;
+ }
+ } else {
+ err = sync_dirty_buffer(nilfs->ns_sbh[0]);
}
+
if (unlikely(err)) {
printk(KERN_ERR
"NILFS: unable to write superblock (err=%d)\n", err);
@@ -400,9 +400,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
list_add(&sbi->s_list, &nilfs->ns_supers);
up_write(&nilfs->ns_super_sem);
+ err = -ENOMEM;
sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size);
if (!sbi->s_ifile)
- return -ENOMEM;
+ goto delist;
down_read(&nilfs->ns_segctor_sem);
err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp,
@@ -433,6 +434,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
nilfs_mdt_destroy(sbi->s_ifile);
sbi->s_ifile = NULL;
+ delist:
down_write(&nilfs->ns_super_sem);
list_del_init(&sbi->s_list);
up_write(&nilfs->ns_super_sem);
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 37de1f0..6af1c00 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -608,11 +608,11 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
return -EINVAL;
}
- if (swp) {
+ if (!valid[!swp])
printk(KERN_WARNING "NILFS warning: broken superblock. "
"using spare superblock.\n");
+ if (swp)
nilfs_swap_super_block(nilfs);
- }
nilfs->ns_sbwcount = 0;
nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime);
diff --git a/fs/open.c b/fs/open.c
index 630715f..d74e198 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
f->f_path.mnt = mnt;
f->f_pos = 0;
f->f_op = fops_get(inode->i_fop);
- file_move(f, &inode->i_sb->s_files);
+ file_sb_list_add(f, inode->i_sb);
error = security_dentry_open(f, cred);
if (error)
@@ -721,7 +721,7 @@ cleanup_all:
mnt_drop_write(mnt);
}
}
- file_kill(f);
+ file_sb_list_del(f);
f->f_path.dentry = NULL;
f->f_path.mnt = NULL;
cleanup_file:
diff --git a/fs/pnode.c b/fs/pnode.c
index 5cc564a..8066b8d 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -126,6 +126,9 @@ static int do_make_slave(struct vfsmount *mnt)
return 0;
}
+/*
+ * vfsmount lock must be held for write
+ */
void change_mnt_propagation(struct vfsmount *mnt, int type)
{
if (type == MS_SHARED) {
@@ -270,12 +273,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
prev_src_mnt = child;
}
out:
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
while (!list_empty(&tmp_list)) {
child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash);
umount_tree(child, 0, &umount_list);
}
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
release_mounts(&umount_list);
return ret;
}
@@ -296,6 +299,8 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
* other mounts its parent propagates to.
* Check if any of these mounts that **do not have submounts**
* have more references than 'refcnt'. If so return busy.
+ *
+ * vfsmount lock must be held for read or write
*/
int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
{
@@ -353,6 +358,8 @@ static void __propagate_umount(struct vfsmount *mnt)
* collect all mounts that receive propagation from the mount in @list,
* and return these additional mounts in the same list.
* @list: the list of mounts to be unmounted.
+ *
+ * vfsmount lock must be held for write
*/
int propagate_umount(struct list_head *list)
{
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ae35413..caa7583 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -83,6 +83,7 @@ void reiserfs_evict_inode(struct inode *inode)
dquot_drop(inode);
inode->i_blocks = 0;
reiserfs_write_unlock_once(inode->i_sb, depth);
+ return;
no_delete:
end_writeback(inode);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 1ec952b..812e2c0 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb,
/* flush out the real blocks */
for (i = 0; i < get_desc_trans_len(desc); i++) {
set_buffer_dirty(real_blocks[i]);
- ll_rw_block(SWRITE, 1, real_blocks + i);
+ write_dirty_buffer(real_blocks[i], WRITE);
}
for (i = 0; i < get_desc_trans_len(desc); i++) {
wait_on_buffer(real_blocks[i]);
diff --git a/fs/super.c b/fs/super.c
index 9674ab2..8819e3a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type)
s = NULL;
goto out;
}
+#ifdef CONFIG_SMP
+ s->s_files = alloc_percpu(struct list_head);
+ if (!s->s_files) {
+ security_sb_free(s);
+ kfree(s);
+ s = NULL;
+ goto out;
+ } else {
+ int i;
+
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
+ }
+#else
INIT_LIST_HEAD(&s->s_files);
+#endif
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
@@ -108,6 +123,9 @@ out:
*/
static inline void destroy_super(struct super_block *s)
{
+#ifdef CONFIG_SMP
+ free_percpu(s->s_files);
+#endif
security_sb_free(s);
kfree(s->s_subtype);
kfree(s->s_options);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 048484f..46f7a80 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
sb->s_dirt = 1;
unlock_super (sb);
@@ -207,10 +205,8 @@ do_more:
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
if (overflow) {
fragment += count;
@@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
sb->s_dirt = 1;
UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
@@ -680,10 +674,8 @@ cg_found:
succed:
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
sb->s_dirt = 1;
result += cgno * uspi->s_fpg;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 428017e..2eabf04 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode)
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
sb->s_dirt = 1;
unlock_super (sb);
@@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb,
fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb);
ubh_mark_buffer_dirty(UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer(UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
UFSD("EXIT\n");
}
@@ -290,10 +286,8 @@ cg_found:
}
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
sb->s_dirt = 1;
inode->i_ino = cg * uspi->s_ipg + bit;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 34d5cb1..a58f915 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
ubh_bforget(ind_ubh);
ind_ubh = NULL;
}
- if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) {
- ubh_ll_rw_block(SWRITE, ind_ubh);
- ubh_wait_on_buffer (ind_ubh);
- }
+ if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
+ ubh_sync_block(ind_ubh);
ubh_brelse (ind_ubh);
UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
ubh_bforget(dind_bh);
dind_bh = NULL;
}
- if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) {
- ubh_ll_rw_block(SWRITE, dind_bh);
- ubh_wait_on_buffer (dind_bh);
- }
+ if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
+ ubh_sync_block(dind_bh);
ubh_brelse (dind_bh);
UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode)
ubh_bforget(tind_bh);
tind_bh = NULL;
}
- if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) {
- ubh_ll_rw_block(SWRITE, tind_bh);
- ubh_wait_on_buffer (tind_bh);
- }
+ if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
+ ubh_sync_block(tind_bh);
ubh_brelse (tind_bh);
UFSD("EXIT: ino %lu\n", inode->i_ino);
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 85a7fc9..d2c36d5 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag)
}
}
-void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh)
+void ubh_sync_block(struct ufs_buffer_head *ubh)
{
- if (!ubh)
- return;
+ if (ubh) {
+ unsigned i;
- ll_rw_block(rw, ubh->count, ubh->bh);
-}
+ for (i = 0; i < ubh->count; i++)
+ write_dirty_buffer(ubh->bh[i], WRITE);
-void ubh_wait_on_buffer (struct ufs_buffer_head * ubh)
-{
- unsigned i;
- if (!ubh)
- return;
- for ( i = 0; i < ubh->count; i++ )
- wait_on_buffer (ubh->bh[i]);
+ for (i = 0; i < ubh->count; i++)
+ wait_on_buffer(ubh->bh[i]);
+ }
}
void ubh_bforget (struct ufs_buffer_head * ubh)
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 0466036..9f8775c 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *);
extern void ubh_brelse_uspi (struct ufs_sb_private_info *);
extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *);
extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int);
-extern void ubh_ll_rw_block(int, struct ufs_buffer_head *);
-extern void ubh_wait_on_buffer (struct ufs_buffer_head *);
+extern void ubh_sync_block(struct ufs_buffer_head *);
extern void ubh_bforget (struct ufs_buffer_head *);
extern int ubh_buffer_dirty (struct ufs_buffer_head *);
#define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size)
diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h
index df84e3b..d89dec8 100644
--- a/include/asm-generic/syscalls.h
+++ b/include/asm-generic/syscalls.h
@@ -23,8 +23,10 @@ asmlinkage long sys_vfork(struct pt_regs *regs);
#endif
#ifndef sys_execve
-asmlinkage long sys_execve(char __user *filename, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs *regs);
+asmlinkage long sys_execve(const char __user *filename,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp,
+ struct pt_regs *regs);
#endif
#ifndef sys_mmap2
diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h
index ca16c38..be33b3a 100644
--- a/include/linux/amba/clcd.h
+++ b/include/linux/amba/clcd.h
@@ -150,6 +150,7 @@ struct clcd_fb {
u16 off_cntl;
u32 clcd_cntl;
u32 cmap[16];
+ bool clk_enabled;
};
static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs)
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index c809e28..a065612 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -50,8 +50,8 @@ struct linux_binprm{
int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */
unsigned int per_clear; /* bits to clear in current->personality */
int argc, envc;
- char * filename; /* Name of binary as seen by procps */
- char * interp; /* Name of the binary really executed. Most
+ const char * filename; /* Name of binary as seen by procps */
+ const char * interp; /* Name of the binary really executed. Most
of the time same as filename, but could be
different for binfmt_{misc,script} */
unsigned interp_flags;
@@ -126,7 +126,8 @@ extern int setup_arg_pages(struct linux_binprm * bprm,
unsigned long stack_top,
int executable_stack);
extern int bprm_mm_init(struct linux_binprm *bprm);
-extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm);
+extern int copy_strings_kernel(int argc, const char *const *argv,
+ struct linux_binprm *bprm);
extern int prepare_bprm_creds(struct linux_binprm *bprm);
extern void install_exec_creds(struct linux_binprm *bprm);
extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 43e649a..ec94c12 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -32,7 +32,6 @@ enum bh_state_bits {
BH_Delay, /* Buffer is not yet allocated on disk */
BH_Boundary, /* Block is followed by a discontiguity */
BH_Write_EIO, /* I/O error on write */
- BH_Ordered, /* ordered write */
BH_Eopnotsupp, /* operation not supported (barrier) */
BH_Unwritten, /* Buffer is allocated on disk but not written */
BH_Quiet, /* Buffer Error Prinks to be quiet */
@@ -125,7 +124,6 @@ BUFFER_FNS(Async_Write, async_write)
BUFFER_FNS(Delay, delay)
BUFFER_FNS(Boundary, boundary)
BUFFER_FNS(Write_EIO, write_io_error)
-BUFFER_FNS(Ordered, ordered)
BUFFER_FNS(Eopnotsupp, eopnotsupp)
BUFFER_FNS(Unwritten, unwritten)
@@ -183,6 +181,8 @@ void unlock_buffer(struct buffer_head *bh);
void __lock_buffer(struct buffer_head *bh);
void ll_rw_block(int, int, struct buffer_head * bh[]);
int sync_dirty_buffer(struct buffer_head *bh);
+int __sync_dirty_buffer(struct buffer_head *bh, int rw);
+void write_dirty_buffer(struct buffer_head *bh, int rw);
int submit_bh(int, struct buffer_head *);
void write_boundary_block(struct block_device *bdev,
sector_t bblock, unsigned blocksize);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index ed3e92e..3cb7d04 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -75,7 +75,7 @@ struct cgroup_subsys_state {
unsigned long flags;
/* ID for this css, if possible */
- struct css_id *id;
+ struct css_id __rcu *id;
};
/* bits in struct cgroup_subsys_state flags field */
@@ -205,7 +205,7 @@ struct cgroup {
struct list_head children; /* my children */
struct cgroup *parent; /* my parent */
- struct dentry *dentry; /* cgroup fs entry, RCU protected */
+ struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */
/* Private pointers for each registered subsystem */
struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index c1a62c5..320d6c9 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -16,7 +16,11 @@
# define __release(x) __context__(x,-1)
# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
# define __percpu __attribute__((noderef, address_space(3)))
+#ifdef CONFIG_SPARSE_RCU_POINTER
+# define __rcu __attribute__((noderef, address_space(4)))
+#else
# define __rcu
+#endif
extern void __chk_user_ptr(const volatile void __user *);
extern void __chk_io_ptr(const volatile void __iomem *);
#else
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 4d2c395..4aaeab3 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -84,7 +84,7 @@ struct thread_group_cred {
atomic_t usage;
pid_t tgid; /* thread group process ID */
spinlock_t lock;
- struct key *session_keyring; /* keyring inherited over fork */
+ struct key __rcu *session_keyring; /* keyring inherited over fork */
struct key *process_keyring; /* keyring private to this process */
struct rcu_head rcu; /* RCU deletion hook */
};
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index f59ed29..133c0ba 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -31,7 +31,7 @@ struct embedded_fd_set {
struct fdtable {
unsigned int max_fds;
- struct file ** fd; /* current fd array */
+ struct file __rcu **fd; /* current fd array */
fd_set *close_on_exec;
fd_set *open_fds;
struct rcu_head rcu;
@@ -46,7 +46,7 @@ struct files_struct {
* read mostly part
*/
atomic_t count;
- struct fdtable *fdt;
+ struct fdtable __rcu *fdt;
struct fdtable fdtab;
/*
* written part on a separate cache line in SMP
@@ -55,7 +55,7 @@ struct files_struct {
int next_fd;
struct embedded_fd_set close_on_exec_init;
struct embedded_fd_set open_fds_init;
- struct file * fd_array[NR_OPEN_DEFAULT];
+ struct file __rcu * fd_array[NR_OPEN_DEFAULT];
};
#define rcu_dereference_check_fdtable(files, fdtfd) \
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9a96b4d..aa3dc8d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -125,9 +125,6 @@ struct inodes_stat_t {
* block layer could (in theory) choose to ignore this
* request if it runs into resource problems.
* WRITE A normal async write. Device will be plugged.
- * SWRITE Like WRITE, but a special case for ll_rw_block() that
- * tells it to lock the buffer first. Normally a buffer
- * must be locked before doing IO.
* WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down
* the hint that someone will be waiting on this IO
* shortly. The device must still be unplugged explicitly,
@@ -138,9 +135,6 @@ struct inodes_stat_t {
* immediately after submission. The write equivalent
* of READ_SYNC.
* WRITE_ODIRECT_PLUG Special case write for O_DIRECT only.
- * SWRITE_SYNC
- * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer.
- * See SWRITE.
* WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all
* previously submitted writes must be safely on storage
* before this one is started. Also guarantees that when
@@ -155,7 +149,6 @@ struct inodes_stat_t {
#define READ 0
#define WRITE RW_MASK
#define READA RWA_MASK
-#define SWRITE (WRITE | READA)
#define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG)
#define READ_META (READ | REQ_META)
@@ -165,8 +158,6 @@ struct inodes_stat_t {
#define WRITE_META (WRITE | REQ_META)
#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
REQ_HARDBARRIER)
-#define SWRITE_SYNC_PLUG (SWRITE | REQ_SYNC | REQ_NOIDLE)
-#define SWRITE_SYNC (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
/*
* These aren't really reads or writes, they pass down information about
@@ -929,6 +920,9 @@ struct file {
#define f_vfsmnt f_path.mnt
const struct file_operations *f_op;
spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */
+#ifdef CONFIG_SMP
+ int f_sb_list_cpu;
+#endif
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
@@ -953,9 +947,6 @@ struct file {
unsigned long f_mnt_write_state;
#endif
};
-extern spinlock_t files_lock;
-#define file_list_lock() spin_lock(&files_lock);
-#define file_list_unlock() spin_unlock(&files_lock);
#define get_file(x) atomic_long_inc(&(x)->f_count)
#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1)
@@ -1346,7 +1337,11 @@ struct super_block {
struct list_head s_inodes; /* all inodes */
struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */
+#ifdef CONFIG_SMP
+ struct list_head __percpu *s_files;
+#else
struct list_head s_files;
+#endif
/* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */
struct list_head s_dentry_lru; /* unused dentry lru */
int s_nr_dentry_unused; /* # of dentry on lru */
@@ -1385,7 +1380,7 @@ struct super_block {
* Saved mount options for lazy filesystems using
* generic_show_options()
*/
- char *s_options;
+ char __rcu *s_options;
};
extern struct timespec current_fs_time(struct super_block *sb);
@@ -2197,8 +2192,6 @@ static inline void insert_inode_hash(struct inode *inode) {
__insert_inode_hash(inode, inode->i_ino);
}
-extern void file_move(struct file *f, struct list_head *list);
-extern void file_kill(struct file *f);
#ifdef CONFIG_BLOCK
extern void submit_bio(int, struct bio *);
extern int bdev_read_only(struct block_device *);
diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h
index eca3d52..a42b5bf 100644
--- a/include/linux/fs_struct.h
+++ b/include/linux/fs_struct.h
@@ -5,7 +5,7 @@
struct fs_struct {
int users;
- rwlock_t lock;
+ spinlock_t lock;
int umask;
int in_exec;
struct path root, pwd;
@@ -23,29 +23,29 @@ extern int unshare_fs_struct(void);
static inline void get_fs_root(struct fs_struct *fs, struct path *root)
{
- read_lock(&fs->lock);
+ spin_lock(&fs->lock);
*root = fs->root;
path_get(root);
- read_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
}
static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd)
{
- read_lock(&fs->lock);
+ spin_lock(&fs->lock);
*pwd = fs->pwd;
path_get(pwd);
- read_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
}
static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root,
struct path *pwd)
{
- read_lock(&fs->lock);
+ spin_lock(&fs->lock);
*root = fs->root;
path_get(root);
*pwd = fs->pwd;
path_get(pwd);
- read_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
}
#endif /* _LINUX_FS_STRUCT_H */
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 5f2f4c4..af3f06b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -129,8 +129,8 @@ struct blk_scsi_cmd_filter {
struct disk_part_tbl {
struct rcu_head rcu_head;
int len;
- struct hd_struct *last_lookup;
- struct hd_struct *part[];
+ struct hd_struct __rcu *last_lookup;
+ struct hd_struct __rcu *part[];
};
struct gendisk {
@@ -149,7 +149,7 @@ struct gendisk {
* non-critical accesses use RCU. Always access through
* helpers.
*/
- struct disk_part_tbl *part_tbl;
+ struct disk_part_tbl __rcu *part_tbl;
struct hd_struct part0;
const struct block_device_operations *fops;
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index d5b3876..1f4517d 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -139,7 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk)
#endif
#if defined(CONFIG_NO_HZ)
-#if defined(CONFIG_TINY_RCU)
+#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
extern void rcu_enter_nohz(void);
extern void rcu_exit_nohz(void);
diff --git a/include/linux/idr.h b/include/linux/idr.h
index e968db7..cdb715e 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -50,14 +50,14 @@
struct idr_layer {
unsigned long bitmap; /* A zero bit means "space here" */
- struct idr_layer *ary[1<
+#include
+#include
+
+/* can make br locks by using local lock for read side, global lock for write */
+#define br_lock_init(name) name##_lock_init()
+#define br_read_lock(name) name##_local_lock()
+#define br_read_unlock(name) name##_local_unlock()
+#define br_write_lock(name) name##_global_lock_online()
+#define br_write_unlock(name) name##_global_unlock_online()
+
+#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name)
+#define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name)
+
+
+#define lg_lock_init(name) name##_lock_init()
+#define lg_local_lock(name) name##_local_lock()
+#define lg_local_unlock(name) name##_local_unlock()
+#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu)
+#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu)
+#define lg_global_lock(name) name##_global_lock()
+#define lg_global_unlock(name) name##_global_unlock()
+#define lg_global_lock_online(name) name##_global_lock_online()
+#define lg_global_unlock_online(name) name##_global_unlock_online()
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define LOCKDEP_INIT_MAP lockdep_init_map
+
+#define DEFINE_LGLOCK_LOCKDEP(name) \
+ struct lock_class_key name##_lock_key; \
+ struct lockdep_map name##_lock_dep_map; \
+ EXPORT_SYMBOL(name##_lock_dep_map)
+
+#else
+#define LOCKDEP_INIT_MAP(a, b, c, d)
+
+#define DEFINE_LGLOCK_LOCKDEP(name)
+#endif
+
+
+#define DECLARE_LGLOCK(name) \
+ extern void name##_lock_init(void); \
+ extern void name##_local_lock(void); \
+ extern void name##_local_unlock(void); \
+ extern void name##_local_lock_cpu(int cpu); \
+ extern void name##_local_unlock_cpu(int cpu); \
+ extern void name##_global_lock(void); \
+ extern void name##_global_unlock(void); \
+ extern void name##_global_lock_online(void); \
+ extern void name##_global_unlock_online(void); \
+
+#define DEFINE_LGLOCK(name) \
+ \
+ DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \
+ DEFINE_LGLOCK_LOCKDEP(name); \
+ \
+ void name##_lock_init(void) { \
+ int i; \
+ LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
+ for_each_possible_cpu(i) { \
+ arch_spinlock_t *lock; \
+ lock = &per_cpu(name##_lock, i); \
+ *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \
+ } \
+ } \
+ EXPORT_SYMBOL(name##_lock_init); \
+ \
+ void name##_local_lock(void) { \
+ arch_spinlock_t *lock; \
+ preempt_disable(); \
+ rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \
+ lock = &__get_cpu_var(name##_lock); \
+ arch_spin_lock(lock); \
+ } \
+ EXPORT_SYMBOL(name##_local_lock); \
+ \
+ void name##_local_unlock(void) { \
+ arch_spinlock_t *lock; \
+ rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \
+ lock = &__get_cpu_var(name##_lock); \
+ arch_spin_unlock(lock); \
+ preempt_enable(); \
+ } \
+ EXPORT_SYMBOL(name##_local_unlock); \
+ \
+ void name##_local_lock_cpu(int cpu) { \
+ arch_spinlock_t *lock; \
+ preempt_disable(); \
+ rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \
+ lock = &per_cpu(name##_lock, cpu); \
+ arch_spin_lock(lock); \
+ } \
+ EXPORT_SYMBOL(name##_local_lock_cpu); \
+ \
+ void name##_local_unlock_cpu(int cpu) { \
+ arch_spinlock_t *lock; \
+ rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \
+ lock = &per_cpu(name##_lock, cpu); \
+ arch_spin_unlock(lock); \
+ preempt_enable(); \
+ } \
+ EXPORT_SYMBOL(name##_local_unlock_cpu); \
+ \
+ void name##_global_lock_online(void) { \
+ int i; \
+ preempt_disable(); \
+ rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \
+ for_each_online_cpu(i) { \
+ arch_spinlock_t *lock; \
+ lock = &per_cpu(name##_lock, i); \
+ arch_spin_lock(lock); \
+ } \
+ } \
+ EXPORT_SYMBOL(name##_global_lock_online); \
+ \
+ void name##_global_unlock_online(void) { \
+ int i; \
+ rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \
+ for_each_online_cpu(i) { \
+ arch_spinlock_t *lock; \
+ lock = &per_cpu(name##_lock, i); \
+ arch_spin_unlock(lock); \
+ } \
+ preempt_enable(); \
+ } \
+ EXPORT_SYMBOL(name##_global_unlock_online); \
+ \
+ void name##_global_lock(void) { \
+ int i; \
+ preempt_disable(); \
+ rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \
+ for_each_online_cpu(i) { \
+ arch_spinlock_t *lock; \
+ lock = &per_cpu(name##_lock, i); \
+ arch_spin_lock(lock); \
+ } \
+ } \
+ EXPORT_SYMBOL(name##_global_lock); \
+ \
+ void name##_global_unlock(void) { \
+ int i; \
+ rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \
+ for_each_online_cpu(i) { \
+ arch_spinlock_t *lock; \
+ lock = &per_cpu(name##_lock, i); \
+ arch_spin_unlock(lock); \
+ } \
+ preempt_enable(); \
+ } \
+ EXPORT_SYMBOL(name##_global_unlock);
+#endif
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index b8bb9a6..cb57d65 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -134,7 +134,7 @@ struct vm_area_struct {
within vm_mm. */
/* linked list of VM areas per task, sorted by address */
- struct vm_area_struct *vm_next;
+ struct vm_area_struct *vm_next, *vm_prev;
pgprot_t vm_page_prot; /* Access permissions of this VMA. */
unsigned long vm_flags; /* Flags, see mm.h. */
@@ -299,7 +299,7 @@ struct mm_struct {
* new_owner->mm == mm
* new_owner->alloc_lock is held
*/
- struct task_struct *owner;
+ struct task_struct __rcu *owner;
#endif
#ifdef CONFIG_PROC_FS
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 508f8cf..d0edf7d 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -185,7 +185,7 @@ struct nfs_inode {
struct nfs4_cached_acl *nfs4_acl;
/* NFSv4 state */
struct list_head open_states;
- struct nfs_delegation *delegation;
+ struct nfs_delegation __rcu *delegation;
fmode_t delegation_state;
struct rw_semaphore rwsem;
#endif /* CONFIG_NFS_V4*/
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index b2f1a4d..2026f9e 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -49,28 +49,28 @@
struct notifier_block {
int (*notifier_call)(struct notifier_block *, unsigned long, void *);
- struct notifier_block *next;
+ struct notifier_block __rcu *next;
int priority;
};
struct atomic_notifier_head {
spinlock_t lock;
- struct notifier_block *head;
+ struct notifier_block __rcu *head;
};
struct blocking_notifier_head {
struct rw_semaphore rwsem;
- struct notifier_block *head;
+ struct notifier_block __rcu *head;
};
struct raw_notifier_head {
- struct notifier_block *head;
+ struct notifier_block __rcu *head;
};
struct srcu_notifier_head {
struct mutex mutex;
struct srcu_struct srcu;
- struct notifier_block *head;
+ struct notifier_block __rcu *head;
};
#define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 634b8e6..a39cbed 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr)
{
return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR);
}
+#define radix_tree_indirect_to_ptr(ptr) \
+ radix_tree_indirect_to_ptr((void __force *)(ptr))
static inline int radix_tree_is_indirect_ptr(void *ptr)
{
@@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr)
struct radix_tree_root {
unsigned int height;
gfp_t gfp_mask;
- struct radix_tree_node *rnode;
+ struct radix_tree_node __rcu *rnode;
};
#define RADIX_TREE_INIT(mask) { \
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 4ec3b38..f31ef61 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -10,6 +10,21 @@
#include
/*
+ * Why is there no list_empty_rcu()? Because list_empty() serves this
+ * purpose. The list_empty() function fetches the RCU-protected pointer
+ * and compares it to the address of the list head, but neither dereferences
+ * this pointer itself nor provides this pointer to the caller. Therefore,
+ * it is not necessary to use rcu_dereference(), so that list_empty() can
+ * be used anywhere you would want to use a list_empty_rcu().
+ */
+
+/*
+ * return the ->next pointer of a list_head in an rcu safe
+ * way, we must not access it directly
+ */
+#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next)))
+
+/*
* Insert a new entry between two known consecutive entries.
*
* This is only for internal list manipulation where we know
@@ -20,7 +35,7 @@ static inline void __list_add_rcu(struct list_head *new,
{
new->next = next;
new->prev = prev;
- rcu_assign_pointer(prev->next, new);
+ rcu_assign_pointer(list_next_rcu(prev), new);
next->prev = new;
}
@@ -138,7 +153,7 @@ static inline void list_replace_rcu(struct list_head *old,
{
new->next = old->next;
new->prev = old->prev;
- rcu_assign_pointer(new->prev->next, new);
+ rcu_assign_pointer(list_next_rcu(new->prev), new);
new->next->prev = new;
old->prev = LIST_POISON2;
}
@@ -193,7 +208,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
*/
last->next = at;
- rcu_assign_pointer(head->next, first);
+ rcu_assign_pointer(list_next_rcu(head), first);
first->prev = head;
at->prev = last;
}
@@ -208,7 +223,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
* primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
*/
#define list_entry_rcu(ptr, type, member) \
- container_of(rcu_dereference_raw(ptr), type, member)
+ ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \
+ container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \
+ })
/**
* list_first_entry_rcu - get the first element from a list
@@ -225,9 +242,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
list_entry_rcu((ptr)->next, type, member)
#define __list_for_each_rcu(pos, head) \
- for (pos = rcu_dereference_raw((head)->next); \
+ for (pos = rcu_dereference_raw(list_next_rcu(head)); \
pos != (head); \
- pos = rcu_dereference_raw(pos->next))
+ pos = rcu_dereference_raw(list_next_rcu((pos)))
/**
* list_for_each_entry_rcu - iterate over rcu list of given type
@@ -257,9 +274,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
* as long as the traversal is guarded by rcu_read_lock().
*/
#define list_for_each_continue_rcu(pos, head) \
- for ((pos) = rcu_dereference_raw((pos)->next); \
+ for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \
prefetch((pos)->next), (pos) != (head); \
- (pos) = rcu_dereference_raw((pos)->next))
+ (pos) = rcu_dereference_raw(list_next_rcu(pos)))
/**
* list_for_each_entry_continue_rcu - continue iteration over list of given type
@@ -314,12 +331,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old,
new->next = next;
new->pprev = old->pprev;
- rcu_assign_pointer(*new->pprev, new);
+ rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new);
if (next)
new->next->pprev = &new->next;
old->pprev = LIST_POISON2;
}
+/*
+ * return the first or the next element in an RCU protected hlist
+ */
+#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first)))
+#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next)))
+#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev)))
+
/**
* hlist_add_head_rcu
* @n: the element to add to the hash list.
@@ -346,7 +370,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n,
n->next = first;
n->pprev = &h->first;
- rcu_assign_pointer(h->first, n);
+ rcu_assign_pointer(hlist_first_rcu(h), n);
if (first)
first->pprev = &n->next;
}
@@ -374,7 +398,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n,
{
n->pprev = next->pprev;
n->next = next;
- rcu_assign_pointer(*(n->pprev), n);
+ rcu_assign_pointer(hlist_pprev_rcu(n), n);
next->pprev = &n->next;
}
@@ -401,15 +425,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
{
n->next = prev->next;
n->pprev = &prev->next;
- rcu_assign_pointer(prev->next, n);
+ rcu_assign_pointer(hlist_next_rcu(prev), n);
if (n->next)
n->next->pprev = &n->next;
}
-#define __hlist_for_each_rcu(pos, head) \
- for (pos = rcu_dereference((head)->first); \
- pos && ({ prefetch(pos->next); 1; }); \
- pos = rcu_dereference(pos->next))
+#define __hlist_for_each_rcu(pos, head) \
+ for (pos = rcu_dereference(hlist_first_rcu(head)); \
+ pos && ({ prefetch(pos->next); 1; }); \
+ pos = rcu_dereference(hlist_next_rcu(pos)))
/**
* hlist_for_each_entry_rcu - iterate over rcu list of given type
@@ -422,11 +446,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
* the _rcu list-mutation primitives such as hlist_add_head_rcu()
* as long as the traversal is guarded by rcu_read_lock().
*/
-#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
- for (pos = rcu_dereference_raw((head)->first); \
+#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
+ for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \
pos && ({ prefetch(pos->next); 1; }) && \
({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
- pos = rcu_dereference_raw(pos->next))
+ pos = rcu_dereference_raw(hlist_next_rcu(pos)))
/**
* hlist_for_each_entry_rcu_bh - iterate over rcu list of given type
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index b70ffe5..2ae1371 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n)
}
}
+#define hlist_nulls_first_rcu(head) \
+ (*((struct hlist_nulls_node __rcu __force **)&(head)->first))
+
+#define hlist_nulls_next_rcu(node) \
+ (*((struct hlist_nulls_node __rcu __force **)&(node)->next))
+
/**
* hlist_nulls_del_rcu - deletes entry from hash list without re-initialization
* @n: the element to delete from the hash list.
@@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
n->next = first;
n->pprev = &h->first;
- rcu_assign_pointer(h->first, n);
+ rcu_assign_pointer(hlist_nulls_first_rcu(h), n);
if (!is_a_nulls(first))
first->pprev = &n->next;
}
@@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
* @member: the name of the hlist_nulls_node within the struct.
*
*/
-#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
- for (pos = rcu_dereference_raw((head)->first); \
- (!is_a_nulls(pos)) && \
+#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \
+ for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \
+ (!is_a_nulls(pos)) && \
({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
- pos = rcu_dereference_raw(pos->next))
+ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
#endif
#endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 9fbc54a..42cd6bc 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -41,11 +41,15 @@
#include
#include
#include
+#include
#ifdef CONFIG_RCU_TORTURE_TEST
extern int rcutorture_runnable; /* for sysctl */
#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
+#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
+#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
+
/**
* struct rcu_head - callback structure for use with RCU
* @next: next update requests in a list
@@ -57,29 +61,98 @@ struct rcu_head {
};
/* Exported common interfaces */
-extern void rcu_barrier(void);
+extern void call_rcu_sched(struct rcu_head *head,
+ void (*func)(struct rcu_head *rcu));
+extern void synchronize_sched(void);
extern void rcu_barrier_bh(void);
extern void rcu_barrier_sched(void);
extern void synchronize_sched_expedited(void);
extern int sched_expedited_torture_stats(char *page);
+static inline void __rcu_read_lock_bh(void)
+{
+ local_bh_disable();
+}
+
+static inline void __rcu_read_unlock_bh(void)
+{
+ local_bh_enable();
+}
+
+#ifdef CONFIG_PREEMPT_RCU
+
+extern void __rcu_read_lock(void);
+extern void __rcu_read_unlock(void);
+void synchronize_rcu(void);
+
+/*
+ * Defined as a macro as it is a very low level header included from
+ * areas that don't even know about current. This gives the rcu_read_lock()
+ * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other
+ * types of kernel builds, the rcu_read_lock() nesting depth is unknowable.
+ */
+#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
+
+#else /* #ifdef CONFIG_PREEMPT_RCU */
+
+static inline void __rcu_read_lock(void)
+{
+ preempt_disable();
+}
+
+static inline void __rcu_read_unlock(void)
+{
+ preempt_enable();
+}
+
+static inline void synchronize_rcu(void)
+{
+ synchronize_sched();
+}
+
+static inline int rcu_preempt_depth(void)
+{
+ return 0;
+}
+
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+
/* Internal to kernel */
extern void rcu_init(void);
+extern void rcu_sched_qs(int cpu);
+extern void rcu_bh_qs(int cpu);
+extern void rcu_check_callbacks(int cpu, int user);
+struct notifier_block;
+
+#ifndef CONFIG_RCURING
+#ifdef CONFIG_NO_HZ
+
+extern void rcu_enter_nohz(void);
+extern void rcu_exit_nohz(void);
+
+#else /* #ifdef CONFIG_NO_HZ */
+
+static inline void rcu_enter_nohz(void)
+{
+}
+
+static inline void rcu_exit_nohz(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_NO_HZ */
+#endif
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
#include
-#elif defined(CONFIG_TINY_RCU)
+#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
#include
+#elif defined(CONFIG_RCURING)
+#include
#else
#error "Unknown RCU implementation specified to kernel configuration"
#endif
-#define RCU_HEAD_INIT { .next = NULL, .func = NULL }
-#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT
-#define INIT_RCU_HEAD(ptr) do { \
- (ptr)->next = NULL; (ptr)->func = NULL; \
-} while (0)
-
/*
* init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
* initialization and destruction of rcu_head on the stack. rcu_head structures
@@ -120,14 +193,15 @@ extern struct lockdep_map rcu_sched_lock_map;
extern int debug_lockdep_rcu_enabled(void);
/**
- * rcu_read_lock_held - might we be in RCU read-side critical section?
+ * rcu_read_lock_held() - might we be in RCU read-side critical section?
*
* If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
* read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
* this assumes we are in an RCU read-side critical section unless it can
- * prove otherwise.
+ * prove otherwise. This is useful for debug checks in functions that
+ * require that they be called within an RCU read-side critical section.
*
- * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
* and while lockdep is disabled.
*/
static inline int rcu_read_lock_held(void)
@@ -144,14 +218,16 @@ static inline int rcu_read_lock_held(void)
extern int rcu_read_lock_bh_held(void);
/**
- * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section?
+ * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
*
* If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
* RCU-sched read-side critical section. In absence of
* CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
* critical section unless it can prove otherwise. Note that disabling
* of preemption (including disabling irqs) counts as an RCU-sched
- * read-side critical section.
+ * read-side critical section. This is useful for debug checks in functions
+ * that required that they be called within an RCU-sched read-side
+ * critical section.
*
* Check debug_lockdep_rcu_enabled() to prevent false positives during boot
* and while lockdep is disabled.
@@ -211,7 +287,11 @@ static inline int rcu_read_lock_sched_held(void)
extern int rcu_my_thread_group_empty(void);
-#define __do_rcu_dereference_check(c) \
+/**
+ * rcu_lockdep_assert - emit lockdep splat if specified condition not met
+ * @c: condition to check
+ */
+#define rcu_lockdep_assert(c) \
do { \
static bool __warned; \
if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
@@ -220,41 +300,155 @@ extern int rcu_my_thread_group_empty(void);
} \
} while (0)
+#else /* #ifdef CONFIG_PROVE_RCU */
+
+#define rcu_lockdep_assert(c) do { } while (0)
+
+#endif /* #else #ifdef CONFIG_PROVE_RCU */
+
+/*
+ * Helper functions for rcu_dereference_check(), rcu_dereference_protected()
+ * and rcu_assign_pointer(). Some of these could be folded into their
+ * callers, but they are left separate in order to ease introduction of
+ * multiple flavors of pointers to match the multiple flavors of RCU
+ * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in
+ * the future.
+ */
+#define __rcu_access_pointer(p, space) \
+ ({ \
+ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
+ (void) (((typeof (*p) space *)p) == p); \
+ ((typeof(*p) __force __kernel *)(_________p1)); \
+ })
+#define __rcu_dereference_check(p, c, space) \
+ ({ \
+ typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
+ rcu_lockdep_assert(c); \
+ (void) (((typeof (*p) space *)p) == p); \
+ smp_read_barrier_depends(); \
+ ((typeof(*p) __force __kernel *)(_________p1)); \
+ })
+#define __rcu_dereference_protected(p, c, space) \
+ ({ \
+ rcu_lockdep_assert(c); \
+ (void) (((typeof (*p) space *)p) == p); \
+ ((typeof(*p) __force __kernel *)(p)); \
+ })
+
+#define __rcu_dereference_index_check(p, c) \
+ ({ \
+ typeof(p) _________p1 = ACCESS_ONCE(p); \
+ rcu_lockdep_assert(c); \
+ smp_read_barrier_depends(); \
+ (_________p1); \
+ })
+#define __rcu_assign_pointer(p, v, space) \
+ ({ \
+ if (!__builtin_constant_p(v) || \
+ ((v) != NULL)) \
+ smp_wmb(); \
+ (p) = (typeof(*v) __force space *)(v); \
+ })
+
+
+/**
+ * rcu_access_pointer() - fetch RCU pointer with no dereferencing
+ * @p: The pointer to read
+ *
+ * Return the value of the specified RCU-protected pointer, but omit the
+ * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful
+ * when the value of this pointer is accessed, but the pointer is not
+ * dereferenced, for example, when testing an RCU-protected pointer against
+ * NULL. Although rcu_access_pointer() may also be used in cases where
+ * update-side locks prevent the value of the pointer from changing, you
+ * should instead use rcu_dereference_protected() for this use case.
+ */
+#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu)
+
/**
- * rcu_dereference_check - rcu_dereference with debug checking
+ * rcu_dereference_check() - rcu_dereference with debug checking
* @p: The pointer to read, prior to dereferencing
* @c: The conditions under which the dereference will take place
*
* Do an rcu_dereference(), but check that the conditions under which the
- * dereference will take place are correct. Typically the conditions indicate
- * the various locking conditions that should be held at that point. The check
- * should return true if the conditions are satisfied.
+ * dereference will take place are correct. Typically the conditions
+ * indicate the various locking conditions that should be held at that
+ * point. The check should return true if the conditions are satisfied.
+ * An implicit check for being in an RCU read-side critical section
+ * (rcu_read_lock()) is included.
*
* For example:
*
- * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() ||
- * lockdep_is_held(&foo->lock));
+ * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock));
*
* could be used to indicate to lockdep that foo->bar may only be dereferenced
- * if either the RCU read lock is held, or that the lock required to replace
+ * if either rcu_read_lock() is held, or that the lock required to replace
* the bar struct at foo->bar is held.
*
* Note that the list of conditions may also include indications of when a lock
* need not be held, for example during initialisation or destruction of the
* target struct:
*
- * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() ||
- * lockdep_is_held(&foo->lock) ||
+ * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) ||
* atomic_read(&foo->usage) == 0);
+ *
+ * Inserts memory barriers on architectures that require them
+ * (currently only the Alpha), prevents the compiler from refetching
+ * (and from merging fetches), and, more importantly, documents exactly
+ * which pointers are protected by RCU and checks that the pointer is
+ * annotated as __rcu.
*/
#define rcu_dereference_check(p, c) \
- ({ \
- __do_rcu_dereference_check(c); \
- rcu_dereference_raw(p); \
- })
+ __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu)
/**
- * rcu_dereference_protected - fetch RCU pointer when updates prevented
+ * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * This is the RCU-bh counterpart to rcu_dereference_check().
+ */
+#define rcu_dereference_bh_check(p, c) \
+ __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu)
+
+/**
+ * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * This is the RCU-sched counterpart to rcu_dereference_check().
+ */
+#define rcu_dereference_sched_check(p, c) \
+ __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \
+ __rcu)
+
+#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
+
+/**
+ * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * Similar to rcu_dereference_check(), but omits the sparse checking.
+ * This allows rcu_dereference_index_check() to be used on integers,
+ * which can then be used as array indices. Attempting to use
+ * rcu_dereference_check() on an integer will give compiler warnings
+ * because the sparse address-space mechanism relies on dereferencing
+ * the RCU-protected pointer. Dereferencing integers is not something
+ * that even gcc will put up with.
+ *
+ * Note that this function does not implicitly check for RCU read-side
+ * critical sections. If this function gains lots of uses, it might
+ * make sense to provide versions for each flavor of RCU, but it does
+ * not make sense as of early 2010.
+ */
+#define rcu_dereference_index_check(p, c) \
+ __rcu_dereference_index_check((p), (c))
+
+/**
+ * rcu_dereference_protected() - fetch RCU pointer when updates prevented
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
*
* Return the value of the specified RCU-protected pointer, but omit
* both the smp_read_barrier_depends() and the ACCESS_ONCE(). This
@@ -263,35 +457,61 @@ extern int rcu_my_thread_group_empty(void);
* prevent the compiler from repeating this reference or combining it
* with other references, so it should not be used without protection
* of appropriate locks.
+ *
+ * This function is only for update-side use. Using this function
+ * when protected only by rcu_read_lock() will result in infrequent
+ * but very ugly failures.
*/
#define rcu_dereference_protected(p, c) \
- ({ \
- __do_rcu_dereference_check(c); \
- (p); \
- })
+ __rcu_dereference_protected((p), (c), __rcu)
-#else /* #ifdef CONFIG_PROVE_RCU */
+/**
+ * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * This is the RCU-bh counterpart to rcu_dereference_protected().
+ */
+#define rcu_dereference_bh_protected(p, c) \
+ __rcu_dereference_protected((p), (c), __rcu)
-#define rcu_dereference_check(p, c) rcu_dereference_raw(p)
-#define rcu_dereference_protected(p, c) (p)
+/**
+ * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented
+ * @p: The pointer to read, prior to dereferencing
+ * @c: The conditions under which the dereference will take place
+ *
+ * This is the RCU-sched counterpart to rcu_dereference_protected().
+ */
+#define rcu_dereference_sched_protected(p, c) \
+ __rcu_dereference_protected((p), (c), __rcu)
-#endif /* #else #ifdef CONFIG_PROVE_RCU */
/**
- * rcu_access_pointer - fetch RCU pointer with no dereferencing
+ * rcu_dereference() - fetch RCU-protected pointer for dereferencing
+ * @p: The pointer to read, prior to dereferencing
*
- * Return the value of the specified RCU-protected pointer, but omit the
- * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful
- * when the value of this pointer is accessed, but the pointer is not
- * dereferenced, for example, when testing an RCU-protected pointer against
- * NULL. This may also be used in cases where update-side locks prevent
- * the value of the pointer from changing, but rcu_dereference_protected()
- * is a lighter-weight primitive for this use case.
+ * This is a simple wrapper around rcu_dereference_check().
+ */
+#define rcu_dereference(p) rcu_dereference_check(p, 0)
+
+/**
+ * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Makes rcu_dereference_check() do the dirty work.
*/
-#define rcu_access_pointer(p) ACCESS_ONCE(p)
+#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0)
/**
- * rcu_read_lock - mark the beginning of an RCU read-side critical section.
+ * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Makes rcu_dereference_check() do the dirty work.
+ */
+#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0)
+
+/**
+ * rcu_read_lock() - mark the beginning of an RCU read-side critical section
*
* When synchronize_rcu() is invoked on one CPU while other CPUs
* are within RCU read-side critical sections, then the
@@ -302,7 +522,7 @@ extern int rcu_my_thread_group_empty(void);
* until after the all the other CPUs exit their critical sections.
*
* Note, however, that RCU callbacks are permitted to run concurrently
- * with RCU read-side critical sections. One way that this can happen
+ * with new RCU read-side critical sections. One way that this can happen
* is via the following sequence of events: (1) CPU 0 enters an RCU
* read-side critical section, (2) CPU 1 invokes call_rcu() to register
* an RCU callback, (3) CPU 0 exits the RCU read-side critical section,
@@ -317,7 +537,20 @@ extern int rcu_my_thread_group_empty(void);
* will be deferred until the outermost RCU read-side critical section
* completes.
*
- * It is illegal to block while in an RCU read-side critical section.
+ * You can avoid reading and understanding the next paragraph by
+ * following this rule: don't put anything in an rcu_read_lock() RCU
+ * read-side critical section that would block in a !PREEMPT kernel.
+ * But if you want the full story, read on!
+ *
+ * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it
+ * is illegal to block while in an RCU read-side critical section. In
+ * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU)
+ * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may
+ * be preempted, but explicit blocking is illegal. Finally, in preemptible
+ * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds,
+ * RCU read-side critical sections may be preempted and they may also
+ * block, but only when acquiring spinlocks that are subject to priority
+ * inheritance.
*/
static inline void rcu_read_lock(void)
{
@@ -337,7 +570,7 @@ static inline void rcu_read_lock(void)
*/
/**
- * rcu_read_unlock - marks the end of an RCU read-side critical section.
+ * rcu_read_unlock() - marks the end of an RCU read-side critical section.
*
* See rcu_read_lock() for more information.
*/
@@ -349,15 +582,16 @@ static inline void rcu_read_unlock(void)
}
/**
- * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section
+ * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section
*
* This is equivalent of rcu_read_lock(), but to be used when updates
- * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks
- * consider completion of a softirq handler to be a quiescent state,
- * a process in RCU read-side critical section must be protected by
- * disabling softirqs. Read-side critical sections in interrupt context
- * can use just rcu_read_lock().
- *
+ * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since
+ * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a
+ * softirq handler to be a quiescent state, a process in RCU read-side
+ * critical section must be protected by disabling softirqs. Read-side
+ * critical sections in interrupt context can use just rcu_read_lock(),
+ * though this should at least be commented to avoid confusing people
+ * reading the code.
*/
static inline void rcu_read_lock_bh(void)
{
@@ -379,13 +613,12 @@ static inline void rcu_read_unlock_bh(void)
}
/**
- * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section
+ * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section
*
- * Should be used with either
- * - synchronize_sched()
- * or
- * - call_rcu_sched() and rcu_barrier_sched()
- * on the write-side to insure proper synchronization.
+ * This is equivalent of rcu_read_lock(), but to be used when updates
+ * are being done using call_rcu_sched() or synchronize_rcu_sched().
+ * Read-side critical sections can also be introduced by anything that
+ * disables preemption, including local_irq_disable() and friends.
*/
static inline void rcu_read_lock_sched(void)
{
@@ -420,54 +653,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
preempt_enable_notrace();
}
-
/**
- * rcu_dereference_raw - fetch an RCU-protected pointer
+ * rcu_assign_pointer() - assign to RCU-protected pointer
+ * @p: pointer to assign to
+ * @v: value to assign (publish)
*
- * The caller must be within some flavor of RCU read-side critical
- * section, or must be otherwise preventing the pointer from changing,
- * for example, by holding an appropriate lock. This pointer may later
- * be safely dereferenced. It is the caller's responsibility to have
- * done the right thing, as this primitive does no checking of any kind.
- *
- * Inserts memory barriers on architectures that require them
- * (currently only the Alpha), and, more importantly, documents
- * exactly which pointers are protected by RCU.
- */
-#define rcu_dereference_raw(p) ({ \
- typeof(p) _________p1 = ACCESS_ONCE(p); \
- smp_read_barrier_depends(); \
- (_________p1); \
- })
-
-/**
- * rcu_dereference - fetch an RCU-protected pointer, checking for RCU
- *
- * Makes rcu_dereference_check() do the dirty work.
- */
-#define rcu_dereference(p) \
- rcu_dereference_check(p, rcu_read_lock_held())
-
-/**
- * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh
- *
- * Makes rcu_dereference_check() do the dirty work.
- */
-#define rcu_dereference_bh(p) \
- rcu_dereference_check(p, rcu_read_lock_bh_held())
-
-/**
- * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched
- *
- * Makes rcu_dereference_check() do the dirty work.
- */
-#define rcu_dereference_sched(p) \
- rcu_dereference_check(p, rcu_read_lock_sched_held())
-
-/**
- * rcu_assign_pointer - assign (publicize) a pointer to a newly
- * initialized structure that will be dereferenced by RCU read-side
- * critical sections. Returns the value assigned.
+ * Assigns the specified value to the specified RCU-protected
+ * pointer, ensuring that any concurrent RCU readers will see
+ * any prior initialization. Returns the value assigned.
*
* Inserts memory barriers on architectures that require them
* (pretty much all of them other than x86), and also prevents
@@ -476,14 +669,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
* call documents which pointers will be dereferenced by RCU read-side
* code.
*/
-
#define rcu_assign_pointer(p, v) \
- ({ \
- if (!__builtin_constant_p(v) || \
- ((v) != NULL)) \
- smp_wmb(); \
- (p) = (v); \
- })
+ __rcu_assign_pointer((p), (v), __rcu)
+
+/**
+ * RCU_INIT_POINTER() - initialize an RCU protected pointer
+ *
+ * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep
+ * splats.
+ */
+#define RCU_INIT_POINTER(p, v) \
+ p = (typeof(*v) __force __rcu *)(v)
/* Infrastructure to implement the synchronize_() primitives. */
@@ -494,26 +690,39 @@ struct rcu_synchronize {
extern void wakeme_after_rcu(struct rcu_head *head);
+#ifndef CONFIG_RCURING
+#ifdef CONFIG_PREEMPT_RCU
+
/**
- * call_rcu - Queue an RCU callback for invocation after a grace period.
+ * call_rcu() - Queue an RCU callback for invocation after a grace period.
* @head: structure to be used for queueing the RCU updates.
- * @func: actual update function to be invoked after the grace period
+ * @func: actual callback function to be invoked after the grace period
*
- * The update function will be invoked some time after a full grace
- * period elapses, in other words after all currently executing RCU
- * read-side critical sections have completed. RCU read-side critical
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all pre-existing RCU read-side
+ * critical sections have completed. However, the callback function
+ * might well execute concurrently with RCU read-side critical sections
+ * that started after call_rcu() was invoked. RCU read-side critical
* sections are delimited by rcu_read_lock() and rcu_read_unlock(),
* and may be nested.
*/
extern void call_rcu(struct rcu_head *head,
void (*func)(struct rcu_head *head));
+#else /* #ifdef CONFIG_PREEMPT_RCU */
+
+/* In classic RCU, call_rcu() is just call_rcu_sched(). */
+#define call_rcu call_rcu_sched
+
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
+#endif
+
/**
- * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
+ * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
* @head: structure to be used for queueing the RCU updates.
- * @func: actual update function to be invoked after the grace period
+ * @func: actual callback function to be invoked after the grace period
*
- * The update function will be invoked some time after a full grace
+ * The callback function will be invoked some time after a full grace
* period elapses, in other words after all currently executing RCU
* read-side critical sections have completed. call_rcu_bh() assumes
* that the read-side critical sections end on completion of a softirq
@@ -566,37 +775,4 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head)
}
#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
-#ifndef CONFIG_PROVE_RCU
-#define __do_rcu_dereference_check(c) do { } while (0)
-#endif /* #ifdef CONFIG_PROVE_RCU */
-
-#define __rcu_dereference_index_check(p, c) \
- ({ \
- typeof(p) _________p1 = ACCESS_ONCE(p); \
- __do_rcu_dereference_check(c); \
- smp_read_barrier_depends(); \
- (_________p1); \
- })
-
-/**
- * rcu_dereference_index_check() - rcu_dereference for indices with debug checking
- * @p: The pointer to read, prior to dereferencing
- * @c: The conditions under which the dereference will take place
- *
- * Similar to rcu_dereference_check(), but omits the sparse checking.
- * This allows rcu_dereference_index_check() to be used on integers,
- * which can then be used as array indices. Attempting to use
- * rcu_dereference_check() on an integer will give compiler warnings
- * because the sparse address-space mechanism relies on dereferencing
- * the RCU-protected pointer. Dereferencing integers is not something
- * that even gcc will put up with.
- *
- * Note that this function does not implicitly check for RCU read-side
- * critical sections. If this function gains lots of uses, it might
- * make sense to provide versions for each flavor of RCU, but it does
- * not make sense as of early 2010.
- */
-#define rcu_dereference_index_check(p, c) \
- __rcu_dereference_index_check((p), (c))
-
#endif /* __LINUX_RCUPDATE_H */
diff --git a/include/linux/rcuring.h b/include/linux/rcuring.h
new file mode 100644
index 0000000..343b932
--- /dev/null
+++ b/include/linux/rcuring.h
@@ -0,0 +1,195 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ * Copyright Fujitsu 2008-2010 Lai Jiangshan
+ *
+ * Authors: Dipankar Sarma
+ * Manfred Spraul
+ * Paul E. McKenney Hierarchical version
+ * Lai Jiangshan RCURING version
+ */
+
+#ifndef __LINUX_RCURING_H
+#define __LINUX_RCURING_H
+
+#define __rcu_read_lock_bh() local_bh_disable()
+#define __rcu_read_unlock_bh() local_bh_enable()
+#define __rcu_read_lock_sched() preempt_disable()
+#define __rcu_read_unlock_sched() preempt_enable()
+
+#ifdef CONFIG_RCURING_BH
+extern void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *));
+extern void synchronize_rcu_bh(void);
+extern void synchronize_rcu_bh_expedited(void);
+extern void rcu_bh_qs(int cpu);
+extern long rcu_batches_completed_bh(void);
+extern void rcu_barrier_bh(void);
+extern void rcu_bh_force_quiescent_state(void);
+#else /* CONFIG_RCURING_BH */
+#define call_rcu_bh call_rcu_sched
+#define synchronize_rcu_bh synchronize_rcu_sched
+#define synchronize_rcu_bh_expedited synchronize_rcu_sched_expedited
+#define rcu_bh_qs(cpu) do { (void)(cpu); } while (0)
+#define rcu_batches_completed_bh rcu_batches_completed_sched
+#define rcu_barrier_bh rcu_barrier_sched
+#define rcu_bh_force_quiescent_state rcu_sched_force_quiescent_state
+#endif /* CONFIG_RCURING_BH */
+
+extern
+void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *));
+extern void synchronize_rcu_sched(void);
+#define synchronize_sched synchronize_rcu_sched
+extern void synchronize_rcu_sched_expedited(void);
+#define synchronize_sched_expedited synchronize_rcu_sched_expedited
+extern void rcu_note_context_switch(int cpu);
+extern long rcu_batches_completed_sched(void);
+extern void rcu_barrier_sched(void);
+extern void rcu_sched_force_quiescent_state(void);
+
+/*
+ * The @flags is valid only when RCURING_PREEMPT_SAVED is set.
+ *
+ * When preemptible rcu read site is preempted, we save RCURING_PREEMPT_SAVED,
+ * RCURING_PREEMPT_QS and this read site's locked_complete(only the last
+ * RCURING_COUNTERS_SHIFT bits) in the @flags.
+ *
+ * When the outmost rcu read site is closed, we will clear
+ * RCURING_PREEMPT_QS bit if it's saved, and let rcu system knows
+ * this task has passed a quiescent state and release the old read site's
+ * locked_complete.
+ */
+#define RCURING_PREEMPT_SAVED (1 << 31)
+#define RCURING_PREEMPT_QS (1 << 30)
+#define RCURING_PREEMPT_FLAGS (RCURING_PREEMPT_SAVED | RCURING_PREEMPT_QS)
+
+struct rcu_task_preempt {
+ unsigned int nesting;
+ unsigned int flags;
+};
+
+static inline void rcu_read_lock_preempt(struct rcu_task_preempt *rcu_task)
+{
+ rcu_task->nesting++;
+ barrier();
+}
+
+static inline void rcu_read_unlock_preempt(struct rcu_task_preempt *rcu_task)
+{
+ int nesting = rcu_task->nesting;
+
+ if (nesting == 1) {
+ unsigned int flags;
+
+ barrier();
+ flags = ACCESS_ONCE(rcu_task->flags);
+
+ if ((flags & RCURING_PREEMPT_FLAGS) == RCURING_PREEMPT_FLAGS) {
+ flags &= ~RCURING_PREEMPT_QS;
+ ACCESS_ONCE(rcu_task->flags) = flags;
+ }
+ }
+
+ barrier();
+ rcu_task->nesting = nesting - 1;
+}
+
+#ifdef CONFIG_RCURING_PREEMPT
+
+#ifdef __GENARATING_OFFSETS__
+#define task_rcu_preempt(p) ((struct rcu_task_preempt *)NULL)
+#else
+#include
+#define task_rcu_preempt(p) \
+ ((struct rcu_task_preempt *)(((void *)p) + TASK_RCU_PREEMPT))
+#endif
+
+#define current_rcu_preempt() task_rcu_preempt(current)
+#define __rcu_read_lock() rcu_read_lock_preempt(current_rcu_preempt())
+#define __rcu_read_unlock() rcu_read_unlock_preempt(current_rcu_preempt())
+extern
+void call_rcu_preempt(struct rcu_head *head, void (*func)(struct rcu_head *));
+#define call_rcu call_rcu_preempt
+extern void synchronize_rcu_preempt(void);
+#define synchronize_rcu synchronize_rcu_preempt
+extern void synchronize_rcu_preempt_expedited(void);
+#define synchronize_rcu_expedited synchronize_rcu_preempt_expedited
+extern long rcu_batches_completed_preempt(void);
+#define rcu_batches_completed rcu_batches_completed_preempt
+extern void rcu_barrier_preempt(void);
+#define rcu_barrier rcu_barrier_preempt
+extern void rcu_preempt_force_quiescent_state(void);
+#define rcu_force_quiescent_state rcu_preempt_force_quiescent_state
+
+struct task_struct;
+static inline void rcu_copy_process(struct task_struct *p)
+{
+ struct rcu_task_preempt *rcu_task = task_rcu_preempt(p);
+
+ rcu_task->nesting = 0;
+ rcu_task->flags = 0;
+}
+
+static inline void exit_rcu(void)
+{
+ if (current_rcu_preempt()->nesting) {
+ WARN_ON(1);
+ current_rcu_preempt()->nesting = 0;
+ }
+}
+
+#else /*CONFIG_RCURING_PREEMPT */
+
+#define __rcu_read_lock() __rcu_read_lock_sched();
+#define __rcu_read_unlock() __rcu_read_unlock_sched();
+#define call_rcu call_rcu_sched
+#define synchronize_rcu synchronize_rcu_sched
+#define synchronize_rcu_expedited synchronize_rcu_sched_expedited
+#define rcu_batches_completed rcu_batches_completed_sched
+#define rcu_barrier rcu_barrier_sched
+#define rcu_force_quiescent_state rcu_sched_force_quiescent_state
+
+static inline void rcu_copy_process(struct task_struct *p) {}
+static inline void exit_rcu(void) {}
+#endif /* CONFIG_RCURING_PREEMPT */
+
+#ifdef CONFIG_NO_HZ
+extern void rcu_kernel_enter(void);
+extern void rcu_kernel_exit(void);
+
+static inline void rcu_nmi_enter(void) { rcu_kernel_enter(); }
+static inline void rcu_nmi_exit(void) { rcu_kernel_exit(); }
+static inline void rcu_irq_enter(void) { rcu_kernel_enter(); }
+static inline void rcu_irq_exit(void) { rcu_kernel_exit(); }
+extern void rcu_enter_nohz(void);
+extern void rcu_exit_nohz(void);
+#else
+static inline void rcu_nmi_enter(void) {}
+static inline void rcu_nmi_exit(void) {}
+static inline void rcu_irq_enter(void) {}
+static inline void rcu_irq_exit(void) {}
+static inline void rcu_enter_nohz(void) {}
+static inline void rcu_exit_nohz(void) {}
+#endif
+
+extern int rcu_needs_cpu(int cpu);
+extern void rcu_check_callbacks(int cpu, int user);
+
+extern void rcu_scheduler_starting(void);
+extern int rcu_scheduler_active __read_mostly;
+
+#endif /* __LINUX_RCURING_H */
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index e2e8931..13877cb 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,103 +27,101 @@
#include
-void rcu_sched_qs(int cpu);
-void rcu_bh_qs(int cpu);
-static inline void rcu_note_context_switch(int cpu)
-{
- rcu_sched_qs(cpu);
-}
+#define rcu_init_sched() do { } while (0)
-#define __rcu_read_lock() preempt_disable()
-#define __rcu_read_unlock() preempt_enable()
-#define __rcu_read_lock_bh() local_bh_disable()
-#define __rcu_read_unlock_bh() local_bh_enable()
-#define call_rcu_sched call_rcu
+#ifdef CONFIG_TINY_RCU
-#define rcu_init_sched() do { } while (0)
-extern void rcu_check_callbacks(int cpu, int user);
+static inline void synchronize_rcu_expedited(void)
+{
+ synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */
+}
-static inline int rcu_needs_cpu(int cpu)
+static inline void rcu_barrier(void)
{
- return 0;
+ rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */
}
-/*
- * Return the number of grace periods.
- */
-static inline long rcu_batches_completed(void)
+#else /* #ifdef CONFIG_TINY_RCU */
+
+void rcu_barrier(void);
+void synchronize_rcu_expedited(void);
+
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
+static inline void synchronize_rcu_bh(void)
{
- return 0;
+ synchronize_sched();
}
-/*
- * Return the number of bottom-half grace periods.
- */
-static inline long rcu_batches_completed_bh(void)
+static inline void synchronize_rcu_bh_expedited(void)
{
- return 0;
+ synchronize_sched();
}
-static inline void rcu_force_quiescent_state(void)
+#ifdef CONFIG_TINY_RCU
+
+static inline void rcu_preempt_note_context_switch(void)
{
}
-static inline void rcu_bh_force_quiescent_state(void)
+static inline void exit_rcu(void)
{
}
-static inline void rcu_sched_force_quiescent_state(void)
+static inline int rcu_needs_cpu(int cpu)
{
+ return 0;
}
-extern void synchronize_sched(void);
+#else /* #ifdef CONFIG_TINY_RCU */
+
+void rcu_preempt_note_context_switch(void);
+extern void exit_rcu(void);
+int rcu_preempt_needs_cpu(void);
-static inline void synchronize_rcu(void)
+static inline int rcu_needs_cpu(int cpu)
{
- synchronize_sched();
+ return rcu_preempt_needs_cpu();
}
-static inline void synchronize_rcu_bh(void)
+#endif /* #else #ifdef CONFIG_TINY_RCU */
+
+static inline void rcu_note_context_switch(int cpu)
{
- synchronize_sched();
+ rcu_sched_qs(cpu);
+ rcu_preempt_note_context_switch();
}
-static inline void synchronize_rcu_expedited(void)
+/*
+ * Return the number of grace periods.
+ */
+static inline long rcu_batches_completed(void)
{
- synchronize_sched();
+ return 0;
}
-static inline void synchronize_rcu_bh_expedited(void)
+/*
+ * Return the number of bottom-half grace periods.
+ */
+static inline long rcu_batches_completed_bh(void)
{
- synchronize_sched();
+ return 0;
}
-struct notifier_block;
-
-#ifdef CONFIG_NO_HZ
-
-extern void rcu_enter_nohz(void);
-extern void rcu_exit_nohz(void);
-
-#else /* #ifdef CONFIG_NO_HZ */
-
-static inline void rcu_enter_nohz(void)
+static inline void rcu_force_quiescent_state(void)
{
}
-static inline void rcu_exit_nohz(void)
+static inline void rcu_bh_force_quiescent_state(void)
{
}
-#endif /* #else #ifdef CONFIG_NO_HZ */
-
-static inline void exit_rcu(void)
+static inline void rcu_sched_force_quiescent_state(void)
{
}
-static inline int rcu_preempt_depth(void)
+static inline void rcu_cpu_stall_reset(void)
{
- return 0;
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index c0ed1c0..95518e6 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,64 +30,23 @@
#ifndef __LINUX_RCUTREE_H
#define __LINUX_RCUTREE_H
-struct notifier_block;
-
-extern void rcu_sched_qs(int cpu);
-extern void rcu_bh_qs(int cpu);
extern void rcu_note_context_switch(int cpu);
extern int rcu_needs_cpu(int cpu);
+extern void rcu_cpu_stall_reset(void);
#ifdef CONFIG_TREE_PREEMPT_RCU
-extern void __rcu_read_lock(void);
-extern void __rcu_read_unlock(void);
-extern void synchronize_rcu(void);
extern void exit_rcu(void);
-/*
- * Defined as macro as it is a very low level header
- * included from areas that don't even know about current
- */
-#define rcu_preempt_depth() (current->rcu_read_lock_nesting)
-
#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
-static inline void __rcu_read_lock(void)
-{
- preempt_disable();
-}
-
-static inline void __rcu_read_unlock(void)
-{
- preempt_enable();
-}
-
-#define synchronize_rcu synchronize_sched
-
static inline void exit_rcu(void)
{
}
-static inline int rcu_preempt_depth(void)
-{
- return 0;
-}
-
#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
-static inline void __rcu_read_lock_bh(void)
-{
- local_bh_disable();
-}
-static inline void __rcu_read_unlock_bh(void)
-{
- local_bh_enable();
-}
-
-extern void call_rcu_sched(struct rcu_head *head,
- void (*func)(struct rcu_head *rcu));
extern void synchronize_rcu_bh(void);
-extern void synchronize_sched(void);
extern void synchronize_rcu_expedited(void);
static inline void synchronize_rcu_bh_expedited(void)
@@ -95,7 +54,7 @@ static inline void synchronize_rcu_bh_expedited(void)
synchronize_sched_expedited();
}
-extern void rcu_check_callbacks(int cpu, int user);
+extern void rcu_barrier(void);
extern long rcu_batches_completed(void);
extern long rcu_batches_completed_bh(void);
@@ -104,18 +63,6 @@ extern void rcu_force_quiescent_state(void);
extern void rcu_bh_force_quiescent_state(void);
extern void rcu_sched_force_quiescent_state(void);
-#ifdef CONFIG_NO_HZ
-void rcu_enter_nohz(void);
-void rcu_exit_nohz(void);
-#else /* CONFIG_NO_HZ */
-static inline void rcu_enter_nohz(void)
-{
-}
-static inline void rcu_exit_nohz(void)
-{
-}
-#endif /* CONFIG_NO_HZ */
-
/* A context switch is a grace period for RCU-sched and RCU-bh. */
static inline int rcu_blocking_is_gp(void)
{
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ce160d6..15b332d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1202,13 +1202,19 @@ struct task_struct {
unsigned int policy;
cpumask_t cpus_allowed;
-#ifdef CONFIG_TREE_PREEMPT_RCU
+#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
char rcu_read_unlock_special;
- struct rcu_node *rcu_blocked_node;
struct list_head rcu_node_entry;
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TREE_PREEMPT_RCU
+ struct rcu_node *rcu_blocked_node;
#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifdef CONFIG_RCURING_PREEMPT
+ struct rcu_task_preempt rcu_task_preempt;
+#endif
+
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
#endif
@@ -1288,9 +1294,9 @@ struct task_struct {
struct list_head cpu_timers[3];
/* process credentials */
- const struct cred *real_cred; /* objective and real subjective task
+ const struct cred __rcu *real_cred; /* objective and real subjective task
* credentials (COW) */
- const struct cred *cred; /* effective (overridable) subjective task
+ const struct cred __rcu *cred; /* effective (overridable) subjective task
* credentials (COW) */
struct mutex cred_guard_mutex; /* guard against foreign influences on
* credential calculations
@@ -1418,7 +1424,7 @@ struct task_struct {
#endif
#ifdef CONFIG_CGROUPS
/* Control Group info protected by css_set_lock */
- struct css_set *cgroups;
+ struct css_set __rcu *cgroups;
/* cg_list protected by css_set_lock and tsk->alloc_lock */
struct list_head cg_list;
#endif
@@ -1740,7 +1746,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
#define used_math() tsk_used_math(current)
-#ifdef CONFIG_TREE_PREEMPT_RCU
+#ifdef CONFIG_PREEMPT_RCU
#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
@@ -1749,11 +1755,13 @@ static inline void rcu_copy_process(struct task_struct *p)
{
p->rcu_read_lock_nesting = 0;
p->rcu_read_unlock_special = 0;
+#ifdef CONFIG_TREE_PREEMPT_RCU
p->rcu_blocked_node = NULL;
+#endif
INIT_LIST_HEAD(&p->rcu_node_entry);
}
-#else
+#elif !defined(CONFIG_RCURING)
static inline void rcu_copy_process(struct task_struct *p)
{
@@ -2109,7 +2117,9 @@ extern void daemonize(const char *, ...);
extern int allow_signal(int);
extern int disallow_signal(int);
-extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
+extern int do_execve(const char *,
+ const char __user * const __user *,
+ const char __user * const __user *, struct pt_regs *);
extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
struct task_struct *fork_idle(int);
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index ae0a528..92e52a1 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -213,6 +213,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv)
* @dma_alignment: SPI controller constraint on DMA buffers alignment.
* @mode_bits: flags understood by this controller driver
* @flags: other constraints relevant to this driver
+ * @bus_lock_spinlock: spinlock for SPI bus locking
+ * @bus_lock_mutex: mutex for SPI bus locking
+ * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use
* @setup: updates the device mode and clocking records used by a
* device's SPI controller; protocol code may call this. This
* must fail if an unrecognized or unsupported mode is requested.
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4d5d2f5..58971e8 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -108,19 +108,43 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
/**
- * srcu_dereference - fetch SRCU-protected pointer with checking
+ * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
+ * @p: the pointer to fetch and protect for later dereferencing
+ * @sp: pointer to the srcu_struct, which is used to check that we
+ * really are in an SRCU read-side critical section.
+ * @c: condition to check for update-side use
*
- * Makes rcu_dereference_check() do the dirty work.
+ * If PROVE_RCU is enabled, invoking this outside of an RCU read-side
+ * critical section will result in an RCU-lockdep splat, unless @c evaluates
+ * to 1. The @c argument will normally be a logical expression containing
+ * lockdep_is_held() calls.
*/
-#define srcu_dereference(p, sp) \
- rcu_dereference_check(p, srcu_read_lock_held(sp))
+#define srcu_dereference_check(p, sp, c) \
+ __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu)
+
+/**
+ * srcu_dereference - fetch SRCU-protected pointer for later dereferencing
+ * @p: the pointer to fetch and protect for later dereferencing
+ * @sp: pointer to the srcu_struct, which is used to check that we
+ * really are in an SRCU read-side critical section.
+ *
+ * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU
+ * is enabled, invoking this outside of an RCU read-side critical
+ * section will result in an RCU-lockdep splat.
+ */
+#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0)
/**
* srcu_read_lock - register a new reader for an SRCU-protected structure.
* @sp: srcu_struct in which to register the new reader.
*
* Enter an SRCU read-side critical section. Note that SRCU read-side
- * critical sections may be nested.
+ * critical sections may be nested. However, it is illegal to
+ * call anything that waits on an SRCU grace period for the same
+ * srcu_struct, whether directly or indirectly. Please note that
+ * one way to indirectly wait on an SRCU grace period is to acquire
+ * a mutex that is held elsewhere while calling synchronize_srcu() or
+ * synchronize_srcu_expedited().
*/
static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
{
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index 671538d..8eee9db 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -69,7 +69,7 @@ struct gss_cl_ctx {
enum rpc_gss_proc gc_proc;
u32 gc_seq;
spinlock_t gc_seq_lock;
- struct gss_ctx *gc_gss_ctx;
+ struct gss_ctx __rcu *gc_gss_ctx;
struct xdr_netobj gc_wire_ctx;
u32 gc_win;
unsigned long gc_expiry;
@@ -80,7 +80,7 @@ struct gss_upcall_msg;
struct gss_cred {
struct rpc_cred gc_base;
enum rpc_gss_svc gc_service;
- struct gss_cl_ctx *gc_ctx;
+ struct gss_cl_ctx __rcu *gc_ctx;
struct gss_upcall_msg *gc_upcall;
unsigned long gc_upcall_timestamp;
unsigned char gc_machine_cred : 1;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 6e5d197..e6319d1 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -820,7 +820,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
u64 mask, int fd,
const char __user *pathname);
-int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
+int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]);
asmlinkage long sys_perf_event_open(
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 1437da3..67d64e6 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -329,6 +329,13 @@ struct tty_struct {
struct tty_port *port;
};
+/* Each of a tty's open files has private_data pointing to tty_file_private */
+struct tty_file_private {
+ struct tty_struct *tty;
+ struct file *file;
+ struct list_head list;
+};
+
/* tty magic number */
#define TTY_MAGIC 0x5401
@@ -458,6 +465,7 @@ extern void proc_clear_tty(struct task_struct *p);
extern struct tty_struct *get_current_tty(void);
extern void tty_default_fops(struct file_operations *fops);
extern struct tty_struct *alloc_tty_struct(void);
+extern void tty_add_file(struct tty_struct *tty, struct file *file);
extern void free_tty_struct(struct tty_struct *tty);
extern void initialize_tty_struct(struct tty_struct *tty,
struct tty_driver *driver, int idx);
@@ -470,6 +478,7 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
extern struct mutex tty_mutex;
+extern spinlock_t tty_files_lock;
extern void tty_write_unlock(struct tty_struct *tty);
extern int tty_write_lock(struct tty_struct *tty, int ndelay);
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 726cc35..dd1fdb8 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -45,7 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p)
return 0;
rcu_read_lock();
- id = rcu_dereference(net_cls_subsys_id);
+ id = rcu_dereference_index_check(net_cls_subsys_id,
+ rcu_read_lock_held());
if (id >= 0)
classid = container_of(task_subsys_state(p, id),
struct cgroup_cls_state, css)->classid;
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index e624dae..caf17db 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -75,7 +75,7 @@ struct nf_conntrack_helper;
/* nf_conn feature for connections that have a helper */
struct nf_conn_help {
/* Helper. if any */
- struct nf_conntrack_helper *helper;
+ struct nf_conntrack_helper __rcu *helper;
union nf_conntrack_help help;
diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h
index 6a664c3..7dc97d1 100644
--- a/include/sound/emu10k1.h
+++ b/include/sound/emu10k1.h
@@ -1707,6 +1707,7 @@ struct snd_emu10k1 {
unsigned int card_type; /* EMU10K1_CARD_* */
unsigned int ecard_ctrl; /* ecard control bits */
unsigned long dma_mask; /* PCI DMA mask */
+ unsigned int delay_pcm_irq; /* in samples */
int max_cache_pages; /* max memory size / PAGE_SIZE */
struct snd_dma_buffer silent_page; /* silent page */
struct snd_dma_buffer ptb_pages; /* page table pages */
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
new file mode 100644
index 0000000..49682d7
--- /dev/null
+++ b/include/trace/events/workqueue.h
@@ -0,0 +1,62 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
+#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WORKQUEUE_H
+
+#include
+#include
+
+/**
+ * workqueue_execute_start - called immediately before the workqueue callback
+ * @work: pointer to struct work_struct
+ *
+ * Allows to track workqueue execution.
+ */
+TRACE_EVENT(workqueue_execute_start,
+
+ TP_PROTO(struct work_struct *work),
+
+ TP_ARGS(work),
+
+ TP_STRUCT__entry(
+ __field( void *, work )
+ __field( void *, function)
+ ),
+
+ TP_fast_assign(
+ __entry->work = work;
+ __entry->function = work->func;
+ ),
+
+ TP_printk("work struct %p: function %pf", __entry->work, __entry->function)
+);
+
+/**
+ * workqueue_execute_end - called immediately before the workqueue callback
+ * @work: pointer to struct work_struct
+ *
+ * Allows to track workqueue execution.
+ */
+TRACE_EVENT(workqueue_execute_end,
+
+ TP_PROTO(struct work_struct *work),
+
+ TP_ARGS(work),
+
+ TP_STRUCT__entry(
+ __field( void *, work )
+ ),
+
+ TP_fast_assign(
+ __entry->work = work;
+ ),
+
+ TP_printk("work struct %p", __entry->work)
+);
+
+
+#endif /* _TRACE_WORKQUEUE_H */
+
+/* This part must be outside protection */
+#include
diff --git a/init/Kconfig b/init/Kconfig
index 2de5b1c..48e58d9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -340,6 +340,7 @@ choice
config TREE_RCU
bool "Tree-based hierarchical RCU"
+ depends on !PREEMPT && SMP
help
This option selects the RCU implementation that is
designed for very large SMP system with hundreds or
@@ -347,7 +348,7 @@ config TREE_RCU
smaller systems.
config TREE_PREEMPT_RCU
- bool "Preemptable tree-based hierarchical RCU"
+ bool "Preemptible tree-based hierarchical RCU"
depends on PREEMPT
help
This option selects the RCU implementation that is
@@ -365,8 +366,25 @@ config TINY_RCU
is not required. This option greatly reduces the
memory footprint of RCU.
+config TINY_PREEMPT_RCU
+ bool "Preemptible UP-only small-memory-footprint RCU"
+ depends on !SMP && PREEMPT
+ help
+ This option selects the RCU implementation that is designed
+ for real-time UP systems. This option greatly reduces the
+ memory footprint of RCU.
+
+config RCURING
+ bool "Multi-GP ring based RCU"
+
endchoice
+config PREEMPT_RCU
+ def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU )
+ help
+ This option enables preemptible-RCU code that is common between
+ the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations.
+
config RCU_TRACE
bool "Enable tracing for RCU"
depends on TREE_RCU || TREE_PREEMPT_RCU
@@ -387,9 +405,12 @@ config RCU_FANOUT
help
This option controls the fanout of hierarchical implementations
of RCU, allowing RCU to work efficiently on machines with
- large numbers of CPUs. This value must be at least the cube
- root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit
- systems and up to 262,144 for 64-bit systems.
+ large numbers of CPUs. This value must be at least the fourth
+ root of NR_CPUS, which allows NR_CPUS to be insanely large.
+ The default value of RCU_FANOUT should be used for production
+ systems, but if you are stress-testing the RCU implementation
+ itself, small RCU_FANOUT values allow you to test large-system
+ code paths on small(er) systems.
Select a specific number if testing RCU itself.
Take the default if unsure.
@@ -432,6 +453,35 @@ config TREE_RCU_TRACE
TREE_PREEMPT_RCU implementations, permitting Makefile to
trivially select kernel/rcutree_trace.c.
+config RCURING_BH
+ bool "RCU for bh"
+ default y
+ depends on RCURING && !PREEMPT_RT
+ help
+ If Y, use a independent rcu domain for rcu_bh.
+ If N, rcu_bh will map to rcu_sched(except rcu_read_lock_bh,
+ rcu_read_unlock_bh).
+
+config RCURING_BH_PREEMPT
+ bool
+ depends on PREEMPT_RT
+ help
+ rcu_bh will map to rcu_preempt.
+
+config RCURING_PREEMPT
+ bool "RCURING based reemptible RCU"
+ default n
+ depends on RCURING && PREEMPT
+ help
+ If Y, normal rcu functionality will map to rcu_preempt.
+ If N, normal rcu functionality will map to rcu_sched.
+
+config RCURING_COUNTERS_SHIFT
+ int "RCURING's counter shift"
+ range 1 10
+ depends on RCURING
+ default 4
+
endmenu # "RCU Subsystem"
config IKCONFIG
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index 2b10853..3098a38 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -24,10 +24,11 @@ static int __init no_initrd(char *str)
__setup("noinitrd", no_initrd);
-static int __init do_linuxrc(void * shell)
+static int __init do_linuxrc(void *_shell)
{
- static char *argv[] = { "linuxrc", NULL, };
- extern char * envp_init[];
+ static const char *argv[] = { "linuxrc", NULL, };
+ extern const char *envp_init[];
+ const char *shell = _shell;
sys_close(old_fd);sys_close(root_fd);
sys_setsid();
diff --git a/init/main.c b/init/main.c
index 22d61cb..94ab488 100644
--- a/init/main.c
+++ b/init/main.c
@@ -197,8 +197,8 @@ static int __init set_reset_devices(char *str)
__setup("reset_devices", set_reset_devices);
-static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
-char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
static const char *panic_later, *panic_param;
extern const struct obs_kernel_param __setup_start[], __setup_end[];
@@ -809,7 +809,7 @@ static void __init do_pre_smp_initcalls(void)
do_one_initcall(*fn);
}
-static void run_init_process(char *init_filename)
+static void run_init_process(const char *init_filename)
{
argv_init[0] = init_filename;
kernel_execve(init_filename, argv_init, envp_init);
diff --git a/kernel/Makefile b/kernel/Makefile
index 0b72d1a..9681a3c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -86,6 +86,8 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o
obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o
obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o
obj-$(CONFIG_TINY_RCU) += rcutiny.o
+obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o
+obj-$(CONFIG_RCURING) += rcuring.o
obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 192f88c..e5c5497 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -138,7 +138,7 @@ struct css_id {
* is called after synchronize_rcu(). But for safe use, css_is_removed()
* css_tryget() should be used for avoiding race.
*/
- struct cgroup_subsys_state *css;
+ struct cgroup_subsys_state __rcu *css;
/*
* ID of this css.
*/
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index c438f54..be775f7 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -255,7 +255,14 @@ extern void kdb_ps1(const struct task_struct *p);
extern void kdb_print_nameval(const char *name, unsigned long val);
extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
extern void kdb_meminfo_proc_show(void);
+#ifdef CONFIG_KALLSYMS
extern const char *kdb_walk_kallsyms(loff_t *pos);
+#else /* ! CONFIG_KALLSYMS */
+static inline const char *kdb_walk_kallsyms(loff_t *pos)
+{
+ return NULL;
+}
+#endif /* ! CONFIG_KALLSYMS */
extern char *kdb_getstr(char *, size_t, char *);
/* Defines for kdb_symbol_print */
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
index 45344d5..6b2485d 100644
--- a/kernel/debug/kdb/kdb_support.c
+++ b/kernel/debug/kdb/kdb_support.c
@@ -82,8 +82,8 @@ static char *kdb_name_table[100]; /* arbitrary size */
int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
{
int ret = 0;
- unsigned long symbolsize;
- unsigned long offset;
+ unsigned long symbolsize = 0;
+ unsigned long offset = 0;
#define knt1_size 128 /* must be >= kallsyms table size */
char *knt1 = NULL;
diff --git a/kernel/exit.c b/kernel/exit.c
index 671ed56..0312022 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1386,8 +1386,7 @@ static int wait_task_stopped(struct wait_opts *wo,
if (!unlikely(wo->wo_flags & WNOWAIT))
*p_code = 0;
- /* don't need the RCU readlock here as we're holding a spinlock */
- uid = __task_cred(p)->uid;
+ uid = task_uid(p);
unlock_sig:
spin_unlock_irq(&p->sighand->siglock);
if (!exit_code)
@@ -1460,7 +1459,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
}
if (!unlikely(wo->wo_flags & WNOWAIT))
p->signal->flags &= ~SIGNAL_STOP_CONTINUED;
- uid = __task_cred(p)->uid;
+ uid = task_uid(p);
spin_unlock_irq(&p->sighand->siglock);
pid = task_pid_vnr(p);
diff --git a/kernel/fork.c b/kernel/fork.c
index 98b4508..b7e9d60 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -300,7 +300,7 @@ out:
#ifdef CONFIG_MMU
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
{
- struct vm_area_struct *mpnt, *tmp, **pprev;
+ struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
struct rb_node **rb_link, *rb_parent;
int retval;
unsigned long charge;
@@ -328,6 +328,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
if (retval)
goto out;
+ prev = NULL;
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
struct file *file;
@@ -359,7 +360,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
goto fail_nomem_anon_vma_fork;
tmp->vm_flags &= ~VM_LOCKED;
tmp->vm_mm = mm;
- tmp->vm_next = NULL;
+ tmp->vm_next = tmp->vm_prev = NULL;
file = tmp->vm_file;
if (file) {
struct inode *inode = file->f_path.dentry->d_inode;
@@ -392,6 +393,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
*/
*pprev = tmp;
pprev = &tmp->vm_next;
+ tmp->vm_prev = prev;
+ prev = tmp;
__vma_link_rb(mm, tmp, rb_link, rb_parent);
rb_link = &tmp->vm_rb.rb_right;
@@ -752,13 +755,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
struct fs_struct *fs = current->fs;
if (clone_flags & CLONE_FS) {
/* tsk->fs is already what we want */
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
if (fs->in_exec) {
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
return -EAGAIN;
}
fs->users++;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
return 0;
}
tsk->fs = copy_fs_struct(fs);
@@ -1676,13 +1679,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
if (new_fs) {
fs = current->fs;
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
current->fs = new_fs;
if (--fs->users)
new_fs = NULL;
else
new_fs = fs;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
}
if (new_mm) {
diff --git a/kernel/kernel-offsets.c b/kernel/kernel-offsets.c
new file mode 100644
index 0000000..0d30817
--- /dev/null
+++ b/kernel/kernel-offsets.c
@@ -0,0 +1,20 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ *
+ * Copyright (C) 2008-2010 Lai Jiangshan
+ */
+
+#define __GENERATING_KERNEL_OFFSETS__
+#include
+#include
+#include
+
+void foo(void);
+
+void foo(void)
+{
+#ifdef CONFIG_RCURING_PREEMPT
+ OFFSET(TASK_RCU_PREEMPT, task_struct, rcu_task_preempt);
+#endif
+}
+
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 4502604..6b5580c 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -503,6 +503,15 @@ unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf,
}
EXPORT_SYMBOL(__kfifo_out_r);
+void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize)
+{
+ unsigned int n;
+
+ n = __kfifo_peek_n(fifo, recsize);
+ fifo->out += n + recsize;
+}
+EXPORT_SYMBOL(__kfifo_skip_r);
+
int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from,
unsigned long len, unsigned int *copied, size_t recsize)
{
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6e9b196..9cd0591 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -153,7 +153,9 @@ static int ____call_usermodehelper(void *data)
goto fail;
}
- retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp);
+ retval = kernel_execve(sub_info->path,
+ (const char *const *)sub_info->argv,
+ (const char *const *)sub_info->envp);
/* Exec failed? */
fail:
diff --git a/kernel/pid.c b/kernel/pid.c
index d55c6fb..39b65b6 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -401,7 +401,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type)
struct task_struct *result = NULL;
if (pid) {
struct hlist_node *first;
- first = rcu_dereference_check(pid->tasks[type].first,
+ first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
rcu_read_lock_held() ||
lockdep_tasklist_lock_is_held());
if (first)
@@ -416,6 +416,7 @@ EXPORT_SYMBOL(pid_task);
*/
struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
{
+ rcu_lockdep_assert(rcu_read_lock_held());
return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
}
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 4d16983..6c79e85 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -73,12 +73,14 @@ int debug_lockdep_rcu_enabled(void)
EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
/**
- * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section?
+ * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
*
* Check for bottom half being disabled, which covers both the
* CONFIG_PROVE_RCU and not cases. Note that if someone uses
* rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled)
- * will show the situation.
+ * will show the situation. This is useful for debug checks in functions
+ * that require that they be called within an RCU read-side critical
+ * section.
*
* Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
*/
diff --git a/kernel/rcuring.c b/kernel/rcuring.c
new file mode 100644
index 0000000..e7cedb5
--- /dev/null
+++ b/kernel/rcuring.c
@@ -0,0 +1,1002 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2008
+ * Copyright Fujitsu 2008-2010 Lai Jiangshan
+ *
+ * Authors: Dipankar Sarma
+ * Manfred Spraul
+ * Paul E. McKenney Hierarchical version
+ * Lai Jiangshan RCURING version
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#define RCURING_IDX_SHIFT CONFIG_RCURING_COUNTERS_SHIFT
+#define RCURING_IDX_MAX (1L << RCURING_IDX_SHIFT)
+#define RCURING_IDX_MASK (RCURING_IDX_MAX - 1)
+#define RCURING_IDX(complete) ((complete) & RCURING_IDX_MASK)
+
+struct rcuring {
+ atomic_t ctr[RCURING_IDX_MAX];
+ long curr_complete;
+ long done_complete;
+
+ raw_spinlock_t lock;
+};
+
+/* It is long history that we use -300 as an initial complete */
+#define RCURING_INIT_COMPLETE -300L
+#define RCURING_INIT(name) \
+{ \
+ {[RCURING_IDX(RCURING_INIT_COMPLETE)] = ATOMIC_INIT(1),}, \
+ RCURING_INIT_COMPLETE, \
+ RCURING_INIT_COMPLETE - 1, \
+ __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
+}
+
+static inline
+int rcuring_ctr_read(struct rcuring *rr, unsigned long complete)
+{
+ int res;
+
+ /* atomic_read() does not ensure to imply barrier(). */
+ barrier();
+ res = atomic_read(rr->ctr + RCURING_IDX(complete));
+ barrier();
+
+ return res;
+}
+
+void __rcuring_advance_done_complete(struct rcuring *rr)
+{
+ long wait_complete = rr->done_complete + 1;
+
+ if (!rcuring_ctr_read(rr, wait_complete)) {
+ do {
+ wait_complete++;
+ } while (!rcuring_ctr_read(rr, wait_complete));
+
+ ACCESS_ONCE(rr->done_complete) = wait_complete - 1;
+ }
+}
+
+static inline
+int rcuring_could_advance_done_complete(struct rcuring *rr)
+{
+ return !rcuring_ctr_read(rr, rr->done_complete + 1);
+}
+
+static inline
+void rcuring_advance_done_complete(struct rcuring *rr)
+{
+ if (rcuring_could_advance_done_complete(rr)) {
+ if (__raw_spin_trylock(&rr->lock)) {
+ __rcuring_advance_done_complete(rr);
+ __raw_spin_unlock(&rr->lock);
+ }
+ }
+}
+
+void __rcuring_advance_curr_complete(struct rcuring *rr, long next_complete)
+{
+ long curr_complete = rr->curr_complete;
+
+ if (curr_complete + 1 == next_complete) {
+ if (!rcuring_ctr_read(rr, next_complete)) {
+ ACCESS_ONCE(rr->curr_complete) = next_complete;
+ smp_mb__before_atomic_inc();
+ atomic_inc(rr->ctr + RCURING_IDX(next_complete));
+ smp_mb__after_atomic_inc();
+ atomic_dec(rr->ctr + RCURING_IDX(curr_complete));
+ }
+ }
+}
+
+static inline
+int rcuring_could_advance_complete(struct rcuring *rr, long next_complete)
+{
+ if (rcuring_could_advance_done_complete(rr))
+ return 1;
+
+ if (rr->curr_complete + 1 == next_complete) {
+ if (!rcuring_ctr_read(rr, next_complete))
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline
+void rcuring_advance_complete(struct rcuring *rr, long next_complete)
+{
+ if (rcuring_could_advance_complete(rr, next_complete)) {
+ if (__raw_spin_trylock(&rr->lock)) {
+ __rcuring_advance_done_complete(rr);
+ __rcuring_advance_curr_complete(rr, next_complete);
+ __raw_spin_unlock(&rr->lock);
+ }
+ }
+}
+
+static inline
+void rcuring_advance_complete_force(struct rcuring *rr, long next_complete)
+{
+ if (rcuring_could_advance_complete(rr, next_complete)) {
+ __raw_spin_lock(&rr->lock);
+ __rcuring_advance_done_complete(rr);
+ __rcuring_advance_curr_complete(rr, next_complete);
+ __raw_spin_unlock(&rr->lock);
+ }
+}
+
+static inline long __locked_complete_adjust(int locked_idx, long complete)
+{
+ long locked_complete;
+
+ locked_complete = (complete & ~RCURING_IDX_MASK) | (long)locked_idx;
+ if (locked_complete - complete <= 0)
+ return locked_complete;
+ else
+ return locked_complete - RCURING_IDX_MAX;
+}
+
+static long rcuring_lock(struct rcuring *rr)
+{
+ long locked_complete;
+ int idx;
+
+ for (;;) {
+ idx = RCURING_IDX(ACCESS_ONCE(rr->curr_complete));
+ if (likely(atomic_inc_not_zero(rr->ctr + idx)))
+ break;
+ }
+ smp_mb__after_atomic_inc();
+
+ locked_complete = ACCESS_ONCE(rr->curr_complete);
+ if (likely(RCURING_IDX(locked_complete) == idx))
+ return locked_complete;
+ else
+ return __locked_complete_adjust(idx, locked_complete);
+}
+
+static void rcuring_unlock(struct rcuring *rr, long locked_complete)
+{
+ smp_mb__before_atomic_dec();
+
+ atomic_dec(rr->ctr + RCURING_IDX(locked_complete));
+}
+
+static inline
+void rcuring_dup_lock(struct rcuring *rr, long locked_complete)
+{
+ atomic_inc(rr->ctr + RCURING_IDX(locked_complete));
+}
+
+static inline
+long rcuring_advance_lock(struct rcuring *rr, long locked_complete)
+{
+ long new_locked_complete = locked_complete;
+
+ if (locked_complete != rr->curr_complete) {
+ /*
+ * Lock the new complete at first, and then release
+ * the old one. It prevents errors when NMI/SMI occurs
+ * after rcuring_unlock() - we still lock it.
+ */
+ new_locked_complete = rcuring_lock(rr);
+ rcuring_unlock(rr, locked_complete);
+ }
+
+ return new_locked_complete;
+}
+
+static inline long rcuring_get_done_complete(struct rcuring *rr)
+{
+ return ACCESS_ONCE(rr->done_complete);
+}
+
+static inline long rcuring_get_curr_complete(struct rcuring *rr)
+{
+ return ACCESS_ONCE(rr->curr_complete);
+}
+
+struct rcu_batch {
+ struct rcu_head *list, **tail;
+};
+
+static void rcu_batch_merge(struct rcu_batch *to, struct rcu_batch *from)
+{
+ if (from->list != NULL) {
+ *to->tail = from->list;
+ to->tail = from->tail;
+
+ from->list = NULL;
+ from->tail = &from->list;
+ }
+}
+
+static void rcu_batch_add(struct rcu_batch *batch, struct rcu_head *new)
+{
+ *batch->tail = new;
+ batch->tail = &new->next;
+}
+
+struct rcu_data {
+ long locked_complete;
+
+ /*
+ * callbacks are in batches (done_complete, curr_complete]
+ * curr_complete - done_complete >= 0
+ * curr_complete - done_complete <= RCURING_IDX_MAX
+ * domain's curr_complete - curr_complete >= 0
+ * domain's done_complete - done_complete >= 0
+ *
+ * curr_complete == done_complete just means @batch is empty.
+ * we have at least one callback in batch[RCURING_IDX(done_complete)]
+ * if curr_complete != done_complete
+ */
+ long curr_complete;
+ long done_complete;
+
+ struct rcu_batch batch[RCURING_IDX_MAX];
+ struct rcu_batch done_batch;
+
+ unsigned int qlen;
+ unsigned long jiffies;
+ unsigned long stall_jiffies;
+};
+
+struct rcu_domain {
+ struct rcuring rcuring;
+ const char *domain_name;
+ struct rcu_data __percpu *rcu_data;
+
+ spinlock_t lock; /* this lock is for fqs or other misc things */
+ long fqs_complete;
+ void (*force_quiescent_state)(struct rcu_domain *domain,
+ long fqs_complete);
+};
+
+#define EXPEDITED_GP_RESERVED_RECOMMEND (RCURING_IDX_SHIFT - 1)
+
+static inline long gp_reserved(struct rcu_domain *domain)
+{
+ return EXPEDITED_GP_RESERVED_RECOMMEND;
+}
+
+static inline void __init rcu_data_init(struct rcu_data *rdp)
+{
+ int idx;
+
+ for (idx = 0; idx < RCURING_IDX_MAX; idx++)
+ rdp->batch[idx].tail = &rdp->batch[idx].list;
+
+ rdp->done_batch.tail = &rdp->done_batch.list;
+}
+
+static void do_advance_callbacks(struct rcu_data *rdp, long done_complete)
+{
+ int idx;
+
+ while (rdp->done_complete != done_complete) {
+ rdp->done_complete++;
+ idx = RCURING_IDX(rdp->done_complete);
+ rcu_batch_merge(&rdp->done_batch, rdp->batch + idx);
+ }
+}
+
+/* Is value in the range (@left_open, @right_close] */
+static inline bool in_range(long left_open, long right_close, long value)
+{
+ return left_open - value < 0 && value - right_close <= 0;
+}
+
+static void advance_callbacks(struct rcu_data *rdp, long done_complete,
+ long curr_complete)
+{
+ if (in_range(done_complete, curr_complete, rdp->curr_complete)) {
+ do_advance_callbacks(rdp, done_complete);
+ } else {
+ do_advance_callbacks(rdp, rdp->curr_complete);
+ rdp->curr_complete = done_complete;
+ rdp->done_complete = done_complete;
+ }
+}
+
+static void __force_quiescent_state(struct rcu_domain *domain,
+ long fqs_complete)
+{
+ int cpu;
+ long fqs_complete_old;
+ long curr_complete = rcuring_get_curr_complete(&domain->rcuring);
+ long done_complete = rcuring_get_done_complete(&domain->rcuring);
+
+ spin_lock(&domain->lock);
+
+ fqs_complete_old = domain->fqs_complete;
+ if (!in_range(done_complete, curr_complete, fqs_complete_old))
+ fqs_complete_old = done_complete;
+
+ if (fqs_complete_old - fqs_complete >= 0) {
+ spin_unlock(&domain->lock);
+ return;
+ }
+
+ domain->fqs_complete = fqs_complete;
+ spin_unlock(&domain->lock);
+
+ for_each_online_cpu(cpu) {
+ struct rcu_data *rdp = per_cpu_ptr(domain->rcu_data, cpu);
+ long locked_complete = ACCESS_ONCE(rdp->locked_complete);
+
+ if (!in_range(fqs_complete_old, fqs_complete, locked_complete))
+ continue;
+
+ if (cpu == smp_processor_id())
+ set_tsk_need_resched(current);
+ else
+ smp_send_reschedule(cpu);
+ }
+}
+
+static void force_quiescent_state(struct rcu_domain *domain, long fqs_complete)
+{
+ domain->force_quiescent_state(domain, fqs_complete);
+}
+
+static
+long prepare_for_new_callback(struct rcu_domain *domain, struct rcu_data *rdp)
+{
+ long curr_complete, done_complete;
+ struct rcuring *rr = &domain->rcuring;
+
+ smp_mb();
+ curr_complete = rcuring_get_curr_complete(rr);
+ done_complete = rcuring_get_done_complete(rr);
+
+ advance_callbacks(rdp, done_complete, curr_complete);
+ rdp->curr_complete = curr_complete;
+
+ if (!rdp->qlen) {
+ rdp->jiffies = jiffies;
+ rdp->stall_jiffies = jiffies;
+ }
+
+ return curr_complete;
+}
+
+static long __call_rcu(struct rcu_domain *domain, struct rcu_head *head,
+ void (*func)(struct rcu_head *))
+{
+ struct rcu_data *rdp;
+ long curr_complete;
+
+ head->next = NULL;
+ head->func = func;
+
+ rdp = this_cpu_ptr(domain->rcu_data);
+ curr_complete = prepare_for_new_callback(domain, rdp);
+ rcu_batch_add(rdp->batch + RCURING_IDX(curr_complete), head);
+ rdp->qlen++;
+
+ return curr_complete;
+}
+
+static void print_rcuring_stall(struct rcu_domain *domain)
+{
+ struct rcuring *rr = &domain->rcuring;
+ unsigned long curr_complete = rcuring_get_curr_complete(rr);
+ unsigned long done_complete = rcuring_get_done_complete(rr);
+ int cpu;
+
+ printk(KERN_ERR "RCU is stall, cpu=%d, domain_name=%s\n", smp_processor_id(),
+ domain->domain_name);
+
+ printk(KERN_ERR "domain's complete(done/curr)=%ld/%ld\n",
+ curr_complete, done_complete);
+ printk(KERN_ERR "curr_ctr=%d, wait_ctr=%d\n",
+ rcuring_ctr_read(rr, curr_complete),
+ rcuring_ctr_read(rr, done_complete + 1));
+
+ for_each_online_cpu(cpu) {
+ struct rcu_data *rdp = per_cpu_ptr(domain->rcu_data, cpu);
+ printk(KERN_ERR "cpu=%d, qlen=%d, complete(locked/dome/curr/)="
+ "%ld/%ld/%ld\n", cpu, rdp->qlen,
+ rdp->locked_complete, rdp->done_complete,
+ rdp->curr_complete);
+ }
+}
+
+static void __rcu_check_callbacks(struct rcu_domain *domain,
+ struct rcu_data *rdp, int in_rcu_softirq)
+{
+ long curr_complete, done_complete;
+ struct rcuring *rr = &domain->rcuring;
+
+ if (!rdp->qlen)
+ return;
+
+ rcuring_advance_done_complete(rr);
+
+ curr_complete = rcuring_get_curr_complete(rr);
+ done_complete = ACCESS_ONCE(rr->done_complete);
+ advance_callbacks(rdp, done_complete, curr_complete);
+
+ if (rdp->curr_complete == curr_complete
+ && rdp->batch[RCURING_IDX(rdp->curr_complete)].list) {
+ long max_gp_allow = RCURING_IDX_MAX - gp_reserved(domain);
+
+ if (curr_complete - done_complete <= max_gp_allow)
+ rcuring_advance_complete(rr, curr_complete + 1);
+ }
+
+ if (rdp->done_batch.list) {
+ if (!in_rcu_softirq)
+ raise_softirq(RCU_SOFTIRQ);
+ } else {
+ if (jiffies - rdp->jiffies > 10) {
+ force_quiescent_state(domain, rdp->curr_complete);
+ rdp->jiffies = jiffies;
+ }
+ if (jiffies - rdp->stall_jiffies > 2 * HZ) {
+ print_rcuring_stall(domain);
+ rdp->stall_jiffies = jiffies;
+ }
+ }
+}
+
+static void rcu_do_batch(struct rcu_domain *domain, struct rcu_data *rdp)
+{
+ int count = 0;
+ struct rcu_head *list, *next;
+
+ list = rdp->done_batch.list;
+
+ if (list) {
+ rdp->done_batch.list = NULL;
+ rdp->done_batch.tail = &rdp->done_batch.list;
+
+ local_irq_enable();
+
+ smp_mb();
+
+ while (list) {
+ next = list->next;
+ prefetch(next);
+ list->func(list);
+ count++;
+ list = next;
+ }
+ local_irq_disable();
+
+ rdp->qlen -= count;
+ rdp->jiffies = jiffies;
+ }
+}
+
+static int __rcu_needs_cpu(struct rcu_data *rdp)
+{
+ return !!rdp->qlen;
+}
+
+static inline
+void __rcu_qsctr_inc(struct rcu_domain *domain, struct rcu_data *rdp)
+{
+ rdp->locked_complete = rcuring_advance_lock(&domain->rcuring,
+ rdp->locked_complete);
+}
+
+static inline void rcu_preempt_save_complete(struct rcu_task_preempt *rcu_task,
+ long locked_complete)
+{
+ unsigned int new_flags;
+
+ new_flags = RCURING_PREEMPT_FLAGS | RCURING_IDX(locked_complete);
+ ACCESS_ONCE(rcu_task->flags) = new_flags;
+}
+
+/*
+ * Every cpu hold a lock_complete. But for preempt rcu, any task may also
+ * hold a lock_complete. This function increases lock_complete for the current
+ * cpu and check(dup_lock_and_save or release or advance) the lock_complete of
+ * the current task.
+ */
+static inline
+void __rcu_preempt_qsctr_inc(struct rcu_domain *domain, struct rcu_data *rdp,
+ struct rcu_task_preempt *rcu_task)
+{
+ long locked_complete = rdp->locked_complete;
+ unsigned int flags = ACCESS_ONCE(rcu_task->flags);
+
+ if (!(flags & RCURING_PREEMPT_SAVED)) {
+ BUG_ON(flags & RCURING_PREEMPT_QS);
+ if (rcu_task->nesting) {
+ /* dup_lock_and_save current task's lock_complete */
+ rcuring_dup_lock(&domain->rcuring, locked_complete);
+ rcu_preempt_save_complete(rcu_task, locked_complete);
+ }
+ } else if (!(rcu_task->nesting)) {
+ /* release current task's lock_complete */
+ rcuring_unlock(&domain->rcuring, (long)flags);
+ ACCESS_ONCE(rcu_task->flags) = 0;
+ } else if (!(flags & RCURING_PREEMPT_QS)) {
+ /* advance_and_save current task's lock_complete */
+ if (RCURING_IDX(locked_complete) != RCURING_IDX((long)flags)) {
+ rcuring_unlock(&domain->rcuring, (long)flags);
+ rcuring_dup_lock(&domain->rcuring, locked_complete);
+ }
+ rcu_preempt_save_complete(rcu_task, locked_complete);
+ }
+
+ /* increases lock_complete for the current cpu */
+ rdp->locked_complete = rcuring_advance_lock(&domain->rcuring,
+ locked_complete);
+}
+
+static void __synchronize_rcu(struct rcu_domain *domain, int expedited)
+{
+ struct rcu_synchronize rcu;
+ long complete;
+
+ init_completion(&rcu.completion);
+
+ local_irq_disable();
+ complete = __call_rcu(domain, &rcu.head, wakeme_after_rcu);
+
+ if (expedited) {
+ /*
+ * Fore a new gp to be started immediately(can use reserved gp).
+ * But if all gp are used, it will fail to start new gp,
+ * and we have to wait until some new gps available.
+ */
+ rcuring_advance_complete_force(&domain->rcuring,
+ complete + 1);
+
+ /* Fore qs and expedite the new gp */
+ force_quiescent_state(domain, complete);
+ }
+ local_irq_enable();
+
+ wait_for_completion(&rcu.completion);
+}
+
+static inline long __rcu_batches_completed(struct rcu_domain *domain)
+{
+ return rcuring_get_done_complete(&domain->rcuring);
+}
+
+#ifdef CONFIG_RCURING_BH
+#define gen_code_for_bh(gen_code, args...) gen_code(bh, ##args)
+#else
+#define gen_code_for_bh(gen_code, args...)
+#endif
+#define gen_code_for_sched(gen_code, args...) gen_code(sched, ##args)
+#ifdef CONFIG_RCURING_PREEMPT
+#define gen_code_for_preempt(gen_code, args...) gen_code(preempt, ##args)
+#else
+#define gen_code_for_preempt(gen_code, args...)
+#endif
+
+#define GEN_CODES(gen_code, args...) \
+ gen_code_for_bh(gen_code, ##args) \
+ gen_code_for_sched(gen_code, ##args) \
+ gen_code_for_preempt(gen_code, ##args) \
+
+#define gen_basic_code(domain) \
+ \
+static void force_quiescent_state_##domain(struct rcu_domain *domain, \
+ long fqs_complete); \
+ \
+static DEFINE_PER_CPU(struct rcu_data, rcu_data_##domain); \
+ \
+struct rcu_domain rcu_domain_##domain = \
+{ \
+ .rcuring = RCURING_INIT(rcu_domain_##domain.rcuring), \
+ .domain_name = #domain, \
+ .rcu_data = &rcu_data_##domain, \
+ .lock = __SPIN_LOCK_UNLOCKED(rcu_domain_##domain.lock), \
+ .force_quiescent_state = force_quiescent_state_##domain, \
+}; \
+ \
+void call_rcu_##domain(struct rcu_head *head, \
+ void (*func)(struct rcu_head *)) \
+{ \
+ unsigned long flags; \
+ \
+ local_irq_save(flags); \
+ __call_rcu(&rcu_domain_##domain, head, func); \
+ local_irq_restore(flags); \
+} \
+EXPORT_SYMBOL_GPL(call_rcu_##domain); \
+ \
+void synchronize_rcu_##domain(void) \
+{ \
+ __synchronize_rcu(&rcu_domain_##domain, 0); \
+} \
+EXPORT_SYMBOL_GPL(synchronize_rcu_##domain); \
+ \
+void synchronize_rcu_##domain##_expedited(void) \
+{ \
+ __synchronize_rcu(&rcu_domain_##domain, 1); \
+} \
+EXPORT_SYMBOL_GPL(synchronize_rcu_##domain##_expedited); \
+ \
+long rcu_batches_completed_##domain(void) \
+{ \
+ return __rcu_batches_completed(&rcu_domain_##domain); \
+} \
+EXPORT_SYMBOL_GPL(rcu_batches_completed_##domain); \
+ \
+void rcu_##domain##_force_quiescent_state(void) \
+{ \
+ force_quiescent_state(&rcu_domain_##domain, 0); \
+} \
+EXPORT_SYMBOL_GPL(rcu_##domain##_force_quiescent_state);
+
+GEN_CODES(gen_basic_code)
+
+static DEFINE_PER_CPU(int, kernel_count);
+
+#define LOCK_COMPLETE(domain) \
+ long complete_##domain = rcuring_lock(&rcu_domain_##domain.rcuring);
+#define SAVE_COMPLETE(domain) \
+ per_cpu(rcu_data_##domain, cpu).locked_complete = complete_##domain;
+#define LOAD_COMPLETE(domain) \
+ int complete_##domain = per_cpu(rcu_data_##domain, cpu).locked_complete;
+#define RELEASE_COMPLETE(domain) \
+ rcuring_unlock(&rcu_domain_##domain.rcuring, complete_##domain);
+
+static void __rcu_kernel_enter_outmost(int cpu)
+{
+ GEN_CODES(LOCK_COMPLETE)
+
+ barrier();
+ per_cpu(kernel_count, cpu) = 1;
+ barrier();
+
+ GEN_CODES(SAVE_COMPLETE)
+}
+
+static void __rcu_kernel_exit_outmost(int cpu)
+{
+ GEN_CODES(LOAD_COMPLETE)
+
+ barrier();
+ per_cpu(kernel_count, cpu) = 0;
+ barrier();
+
+ GEN_CODES(RELEASE_COMPLETE)
+}
+
+#ifdef CONFIG_RCURING_BH
+static void force_quiescent_state_bh(struct rcu_domain *domain,
+ long fqs_complete)
+{
+ __force_quiescent_state(domain, fqs_complete);
+}
+
+static inline void rcu_bh_qsctr_inc_irqoff(int cpu)
+{
+ __rcu_qsctr_inc(&rcu_domain_bh, &per_cpu(rcu_data_bh, cpu));
+}
+
+void rcu_bh_qs(int cpu)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ rcu_bh_qsctr_inc_irqoff(cpu);
+ local_irq_restore(flags);
+}
+
+#else /* CONFIG_RCURING_BH */
+static inline void rcu_bh_qsctr_inc_irqoff(int cpu) { (void)(cpu); }
+#endif /* CONFIG_RCURING_BH */
+
+static void force_quiescent_state_sched(struct rcu_domain *domain,
+ long fqs_complete)
+{
+ __force_quiescent_state(domain, fqs_complete);
+}
+
+static inline void rcu_sched_qsctr_inc_irqoff(int cpu)
+{
+ __rcu_qsctr_inc(&rcu_domain_sched, &per_cpu(rcu_data_sched, cpu));
+}
+
+#ifdef CONFIG_RCURING_PREEMPT
+static void force_quiescent_state_preempt(struct rcu_domain *domain,
+ long fqs_complete)
+{
+ /* To Be Implemented */
+}
+
+static inline void rcu_preempt_qsctr_inc_irqoff(int cpu)
+{
+ __rcu_preempt_qsctr_inc(&rcu_domain_preempt,
+ &per_cpu(rcu_data_preempt, cpu),
+ ¤t->rcu_task_preempt);
+}
+
+#else /* CONFIG_RCURING_PREEMPT */
+static inline void rcu_preempt_qsctr_inc_irqoff(int cpu)
+{
+ (void)(cpu);
+ (void)(__rcu_preempt_qsctr_inc);
+}
+#endif /* CONFIG_RCURING_PREEMPT */
+
+#define gen_needs_cpu(domain, cpu) \
+ if (__rcu_needs_cpu(&per_cpu(rcu_data_##domain, cpu))) \
+ return 1;
+int rcu_needs_cpu(int cpu)
+{
+ GEN_CODES(gen_needs_cpu, cpu)
+ return 0;
+}
+
+#define call_func(domain, func, args...) \
+ func(&rcu_domain_##domain, &__get_cpu_var(rcu_data_##domain), ##args);
+
+/*
+ * Note a context switch. This is a quiescent state for RCU-sched,
+ * and requires special handling for preemptible RCU.
+ */
+void rcu_note_context_switch(int cpu)
+{
+ unsigned long flags;
+
+#ifdef CONFIG_HOTPLUG_CPU
+ /*
+ * The stoper thread need to schedule() to the idle thread
+ * after CPU_DYING.
+ */
+ if (unlikely(!per_cpu(kernel_count, cpu))) {
+ BUG_ON(cpu_online(cpu));
+ return;
+ }
+#endif
+
+ local_irq_save(flags);
+ rcu_bh_qsctr_inc_irqoff(cpu);
+ rcu_sched_qsctr_inc_irqoff(cpu);
+ rcu_preempt_qsctr_inc_irqoff(cpu);
+ local_irq_restore(flags);
+}
+
+static int rcu_cpu_idle(int cpu)
+{
+ return idle_cpu(cpu) && rcu_scheduler_active &&
+ !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT);
+}
+
+void rcu_check_callbacks(int cpu, int user)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ if (user || rcu_cpu_idle(cpu)) {
+ rcu_bh_qsctr_inc_irqoff(cpu);
+ rcu_sched_qsctr_inc_irqoff(cpu);
+ rcu_preempt_qsctr_inc_irqoff(cpu);
+ } else if (!in_softirq())
+ rcu_bh_qsctr_inc_irqoff(cpu);
+
+ GEN_CODES(call_func, __rcu_check_callbacks, 0)
+ local_irq_restore(flags);
+}
+
+static void rcu_process_callbacks(struct softirq_action *unused)
+{
+ local_irq_disable();
+ GEN_CODES(call_func, __rcu_check_callbacks, 1)
+ GEN_CODES(call_func, rcu_do_batch)
+ local_irq_enable();
+}
+
+static void __cpuinit __rcu_offline_cpu(struct rcu_domain *domain,
+ struct rcu_data *off_rdp, struct rcu_data *rdp)
+{
+ if (off_rdp->qlen > 0) {
+ unsigned long flags;
+ long curr_complete;
+
+ /* move all callbacks in @off_rdp to @off_rdp->done_batch */
+ do_advance_callbacks(off_rdp, off_rdp->curr_complete);
+
+ /* move all callbacks in @off_rdp to this cpu(@rdp) */
+ local_irq_save(flags);
+ curr_complete = prepare_for_new_callback(domain, rdp);
+ rcu_batch_merge(rdp->batch + RCURING_IDX(curr_complete),
+ &off_rdp->done_batch);
+ rdp->qlen += off_rdp->qlen;
+ local_irq_restore(flags);
+
+ off_rdp->qlen = 0;
+ }
+}
+
+#define gen_offline(domain, cpu, recieve_cpu) \
+ __rcu_offline_cpu(&rcu_domain_##domain, \
+ &per_cpu(rcu_data_##domain, cpu), \
+ &per_cpu(rcu_data_##domain, recieve_cpu));
+
+static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *hcpu)
+{
+ int cpu = (long)hcpu;
+ int recieve_cpu;
+
+ (void)(recieve_cpu);
+ (void)(__rcu_offline_cpu);
+ (void)(__rcu_kernel_exit_outmost);
+
+ switch (action) {
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DYING:
+ case CPU_DYING_FROZEN:
+ /*
+ * the machine is stopped now, we can access to
+ * any other cpu data.
+ * */
+ recieve_cpu = cpumask_any_but(cpu_online_mask, cpu);
+ GEN_CODES(gen_offline, cpu, recieve_cpu)
+ __rcu_kernel_exit_outmost(cpu);
+ break;
+#endif
+ case CPU_STARTING:
+ case CPU_STARTING_FROZEN:
+ __rcu_kernel_enter_outmost(cpu);
+ default:
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+#define rcu_init_domain_data(domain, cpu) \
+ for_each_possible_cpu(cpu) \
+ rcu_data_init(&per_cpu(rcu_data_##domain, cpu));
+
+void __init rcu_init(void)
+{
+ int cpu;
+
+ GEN_CODES(rcu_init_domain_data, cpu)
+
+ __rcu_kernel_enter_outmost(raw_smp_processor_id());
+ open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
+ cpu_notifier(rcu_cpu_notify, 0);
+}
+
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+
+/*
+ * This function is invoked towards the end of the scheduler's initialization
+ * process. Before this is called, the idle task might contain
+ * RCU read-side critical sections (during which time, this idle
+ * task is booting the system). After this function is called, the
+ * idle tasks are prohibited from containing RCU read-side critical
+ * sections. This function also enables RCU lockdep checking.
+ */
+void rcu_scheduler_starting(void)
+{
+ rcu_scheduler_active = 1;
+}
+
+#ifdef CONFIG_NO_HZ
+
+void rcu_kernel_enter(void)
+{
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ if (__get_cpu_var(kernel_count) == 0)
+ __rcu_kernel_enter_outmost(raw_smp_processor_id());
+ else
+ __get_cpu_var(kernel_count)++;
+ raw_local_irq_restore(flags);
+}
+
+void rcu_kernel_exit(void)
+{
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ if (__get_cpu_var(kernel_count) == 1)
+ __rcu_kernel_exit_outmost(raw_smp_processor_id());
+ else
+ __get_cpu_var(kernel_count)--;
+ raw_local_irq_restore(flags);
+}
+
+void rcu_enter_nohz(void)
+{
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ __rcu_kernel_exit_outmost(raw_smp_processor_id());
+ raw_local_irq_restore(flags);
+}
+
+void rcu_exit_nohz(void)
+{
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ __rcu_kernel_enter_outmost(raw_smp_processor_id());
+ raw_local_irq_restore(flags);
+}
+#endif /* CONFIG_NO_HZ */
+
+static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head);
+static struct completion rcu_barrier_completion;
+static struct kref rcu_barrier_wait_ref;
+static DEFINE_MUTEX(rcu_barrier_mutex);
+
+static void rcu_barrier_release_ref(struct kref *notused)
+{
+ complete(&rcu_barrier_completion);
+}
+
+static void rcu_barrier_callback(struct rcu_head *notused)
+{
+ kref_put(&rcu_barrier_wait_ref, rcu_barrier_release_ref);
+}
+
+static void rcu_barrier_func(void *data)
+{
+ struct rcu_domain *rcu_domain = data;
+
+ kref_get(&rcu_barrier_wait_ref);
+ __call_rcu(rcu_domain, &__get_cpu_var(rcu_barrier_head),
+ rcu_barrier_callback);
+}
+
+static void __rcu_barrier(struct rcu_domain *rcu_domain)
+{
+ mutex_lock(&rcu_barrier_mutex);
+
+ init_completion(&rcu_barrier_completion);
+ kref_init(&rcu_barrier_wait_ref);
+
+ /* queue barrier rcu_heads for every cpu */
+ on_each_cpu(rcu_barrier_func, rcu_domain, 1);
+
+ kref_put(&rcu_barrier_wait_ref, rcu_barrier_release_ref);
+ wait_for_completion(&rcu_barrier_completion);
+
+ mutex_unlock(&rcu_barrier_mutex);
+}
+
+#define gen_rcu_barrier(domain) \
+void rcu_barrier_##domain(void) \
+{ \
+ __rcu_barrier(&rcu_domain_##domain); \
+} \
+EXPORT_SYMBOL_GPL(rcu_barrier_##domain);
+
+GEN_CODES(gen_rcu_barrier)
+
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 196ec02..d806735 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly;
EXPORT_SYMBOL_GPL(rcu_scheduler_active);
#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+/* Forward declarations for rcutiny_plugin.h. */
+static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
+static void __call_rcu(struct rcu_head *head,
+ void (*func)(struct rcu_head *rcu),
+ struct rcu_ctrlblk *rcp);
+
+#include "rcutiny_plugin.h"
+
#ifdef CONFIG_NO_HZ
static long rcu_dynticks_nesting = 1;
@@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user)
rcu_sched_qs(cpu);
else if (!in_softirq())
rcu_bh_qs(cpu);
+ rcu_preempt_check_callbacks();
}
/*
@@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
*rcp->donetail = NULL;
if (rcp->curtail == rcp->donetail)
rcp->curtail = &rcp->rcucblist;
+ rcu_preempt_remove_callbacks(rcp);
rcp->donetail = &rcp->rcucblist;
local_irq_restore(flags);
@@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
{
__rcu_process_callbacks(&rcu_sched_ctrlblk);
__rcu_process_callbacks(&rcu_bh_ctrlblk);
+ rcu_preempt_process_callbacks();
}
/*
@@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head,
}
/*
- * Post an RCU callback to be invoked after the end of an RCU grace
+ * Post an RCU callback to be invoked after the end of an RCU-sched grace
* period. But since we have but one CPU, that would be after any
* quiescent state.
*/
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
__call_rcu(head, func, &rcu_sched_ctrlblk);
}
-EXPORT_SYMBOL_GPL(call_rcu);
+EXPORT_SYMBOL_GPL(call_rcu_sched);
/*
* Post an RCU bottom-half callback to be invoked after any subsequent
@@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
-void rcu_barrier(void)
-{
- struct rcu_synchronize rcu;
-
- init_rcu_head_on_stack(&rcu.head);
- init_completion(&rcu.completion);
- /* Will wake me after RCU finished. */
- call_rcu(&rcu.head, wakeme_after_rcu);
- /* Wait for it. */
- wait_for_completion(&rcu.completion);
- destroy_rcu_head_on_stack(&rcu.head);
-}
-EXPORT_SYMBOL_GPL(rcu_barrier);
-
void rcu_barrier_bh(void)
{
struct rcu_synchronize rcu;
@@ -289,5 +286,3 @@ void __init rcu_init(void)
{
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
}
-
-#include "rcutiny_plugin.h"
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index d223a92..6ceca4f 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -1,7 +1,7 @@
/*
- * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition
* Internal non-public definitions that provide either classic
- * or preemptable semantics.
+ * or preemptible semantics.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -17,11 +17,587 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
- * Copyright IBM Corporation, 2009
+ * Copyright (c) 2010 Linaro
*
* Author: Paul E. McKenney
*/
+#ifdef CONFIG_TINY_PREEMPT_RCU
+
+#include
+
+/* Global control variables for preemptible RCU. */
+struct rcu_preempt_ctrlblk {
+ struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */
+ struct rcu_head **nexttail;
+ /* Tasks blocked in a preemptible RCU */
+ /* read-side critical section while an */
+ /* preemptible-RCU grace period is in */
+ /* progress must wait for a later grace */
+ /* period. This pointer points to the */
+ /* ->next pointer of the last task that */
+ /* must wait for a later grace period, or */
+ /* to &->rcb.rcucblist if there is no */
+ /* such task. */
+ struct list_head blkd_tasks;
+ /* Tasks blocked in RCU read-side critical */
+ /* section. Tasks are placed at the head */
+ /* of this list and age towards the tail. */
+ struct list_head *gp_tasks;
+ /* Pointer to the first task blocking the */
+ /* current grace period, or NULL if there */
+ /* is not such task. */
+ struct list_head *exp_tasks;
+ /* Pointer to first task blocking the */
+ /* current expedited grace period, or NULL */
+ /* if there is no such task. If there */
+ /* is no current expedited grace period, */
+ /* then there cannot be any such task. */
+ u8 gpnum; /* Current grace period. */
+ u8 gpcpu; /* Last grace period blocked by the CPU. */
+ u8 completed; /* Last grace period completed. */
+ /* If all three are equal, RCU is idle. */
+};
+
+static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
+ .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist,
+ .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
+ .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
+ .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
+};
+
+static int rcu_preempted_readers_exp(void);
+static void rcu_report_exp_done(void);
+
+/*
+ * Return true if the CPU has not yet responded to the current grace period.
+ */
+static int rcu_cpu_blocking_cur_gp(void)
+{
+ return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum;
+}
+
+/*
+ * Check for a running RCU reader. Because there is only one CPU,
+ * there can be but one running RCU reader at a time. ;-)
+ */
+static int rcu_preempt_running_reader(void)
+{
+ return current->rcu_read_lock_nesting;
+}
+
+/*
+ * Check for preempted RCU readers blocking any grace period.
+ * If the caller needs a reliable answer, it must disable hard irqs.
+ */
+static int rcu_preempt_blocked_readers_any(void)
+{
+ return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks);
+}
+
+/*
+ * Check for preempted RCU readers blocking the current grace period.
+ * If the caller needs a reliable answer, it must disable hard irqs.
+ */
+static int rcu_preempt_blocked_readers_cgp(void)
+{
+ return rcu_preempt_ctrlblk.gp_tasks != NULL;
+}
+
+/*
+ * Return true if another preemptible-RCU grace period is needed.
+ */
+static int rcu_preempt_needs_another_gp(void)
+{
+ return *rcu_preempt_ctrlblk.rcb.curtail != NULL;
+}
+
+/*
+ * Return true if a preemptible-RCU grace period is in progress.
+ * The caller must disable hardirqs.
+ */
+static int rcu_preempt_gp_in_progress(void)
+{
+ return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum;
+}
+
+/*
+ * Record a preemptible-RCU quiescent state for the specified CPU. Note
+ * that this just means that the task currently running on the CPU is
+ * in a quiescent state. There might be any number of tasks blocked
+ * while in an RCU read-side critical section.
+ *
+ * Unlike the other rcu_*_qs() functions, callers to this function
+ * must disable irqs in order to protect the assignment to
+ * ->rcu_read_unlock_special.
+ *
+ * Because this is a single-CPU implementation, the only way a grace
+ * period can end is if the CPU is in a quiescent state. The reason is
+ * that a blocked preemptible-RCU reader can exit its critical section
+ * only if the CPU is running it at the time. Therefore, when the
+ * last task blocking the current grace period exits its RCU read-side
+ * critical section, neither the CPU nor blocked tasks will be stopping
+ * the current grace period. (In contrast, SMP implementations
+ * might have CPUs running in RCU read-side critical sections that
+ * block later grace periods -- but this is not possible given only
+ * one CPU.)
+ */
+static void rcu_preempt_cpu_qs(void)
+{
+ /* Record both CPU and task as having responded to current GP. */
+ rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum;
+ current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+
+ /*
+ * If there is no GP, or if blocked readers are still blocking GP,
+ * then there is nothing more to do.
+ */
+ if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp())
+ return;
+
+ /* Advance callbacks. */
+ rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum;
+ rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail;
+ rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail;
+
+ /* If there are no blocked readers, next GP is done instantly. */
+ if (!rcu_preempt_blocked_readers_any())
+ rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail;
+
+ /* If there are done callbacks, make RCU_SOFTIRQ process them. */
+ if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
+ raise_softirq(RCU_SOFTIRQ);
+}
+
+/*
+ * Start a new RCU grace period if warranted. Hard irqs must be disabled.
+ */
+static void rcu_preempt_start_gp(void)
+{
+ if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) {
+
+ /* Official start of GP. */
+ rcu_preempt_ctrlblk.gpnum++;
+
+ /* Any blocked RCU readers block new GP. */
+ if (rcu_preempt_blocked_readers_any())
+ rcu_preempt_ctrlblk.gp_tasks =
+ rcu_preempt_ctrlblk.blkd_tasks.next;
+
+ /* If there is no running reader, CPU is done with GP. */
+ if (!rcu_preempt_running_reader())
+ rcu_preempt_cpu_qs();
+ }
+}
+
+/*
+ * We have entered the scheduler, and the current task might soon be
+ * context-switched away from. If this task is in an RCU read-side
+ * critical section, we will no longer be able to rely on the CPU to
+ * record that fact, so we enqueue the task on the blkd_tasks list.
+ * If the task started after the current grace period began, as recorded
+ * by ->gpcpu, we enqueue at the beginning of the list. Otherwise
+ * before the element referenced by ->gp_tasks (or at the tail if
+ * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element.
+ * The task will dequeue itself when it exits the outermost enclosing
+ * RCU read-side critical section. Therefore, the current grace period
+ * cannot be permitted to complete until the ->gp_tasks pointer becomes
+ * NULL.
+ *
+ * Caller must disable preemption.
+ */
+void rcu_preempt_note_context_switch(void)
+{
+ struct task_struct *t = current;
+ unsigned long flags;
+
+ local_irq_save(flags); /* must exclude scheduler_tick(). */
+ if (rcu_preempt_running_reader() &&
+ (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
+
+ /* Possibly blocking in an RCU read-side critical section. */
+ t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
+
+ /*
+ * If this CPU has already checked in, then this task
+ * will hold up the next grace period rather than the
+ * current grace period. Queue the task accordingly.
+ * If the task is queued for the current grace period
+ * (i.e., this CPU has not yet passed through a quiescent
+ * state for the current grace period), then as long
+ * as that task remains queued, the current grace period
+ * cannot end.
+ */
+ list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks);
+ if (rcu_cpu_blocking_cur_gp())
+ rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry;
+ }
+
+ /*
+ * Either we were not in an RCU read-side critical section to
+ * begin with, or we have now recorded that critical section
+ * globally. Either way, we can now note a quiescent state
+ * for this CPU. Again, if we were in an RCU read-side critical
+ * section, and if that critical section was blocking the current
+ * grace period, then the fact that the task has been enqueued
+ * means that current grace period continues to be blocked.
+ */
+ rcu_preempt_cpu_qs();
+ local_irq_restore(flags);
+}
+
+/*
+ * Tiny-preemptible RCU implementation for rcu_read_lock().
+ * Just increment ->rcu_read_lock_nesting, shared state will be updated
+ * if we block.
+ */
+void __rcu_read_lock(void)
+{
+ current->rcu_read_lock_nesting++;
+ barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+
+/*
+ * Handle special cases during rcu_read_unlock(), such as needing to
+ * notify RCU core processing or task having blocked during the RCU
+ * read-side critical section.
+ */
+static void rcu_read_unlock_special(struct task_struct *t)
+{
+ int empty;
+ int empty_exp;
+ unsigned long flags;
+ struct list_head *np;
+ int special;
+
+ /*
+ * NMI handlers cannot block and cannot safely manipulate state.
+ * They therefore cannot possibly be special, so just leave.
+ */
+ if (in_nmi())
+ return;
+
+ local_irq_save(flags);
+
+ /*
+ * If RCU core is waiting for this CPU to exit critical section,
+ * let it know that we have done so.
+ */
+ special = t->rcu_read_unlock_special;
+ if (special & RCU_READ_UNLOCK_NEED_QS)
+ rcu_preempt_cpu_qs();
+
+ /* Hardware IRQ handlers cannot block. */
+ if (in_irq()) {
+ local_irq_restore(flags);
+ return;
+ }
+
+ /* Clean up if blocked during RCU read-side critical section. */
+ if (special & RCU_READ_UNLOCK_BLOCKED) {
+ t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
+
+ /*
+ * Remove this task from the ->blkd_tasks list and adjust
+ * any pointers that might have been referencing it.
+ */
+ empty = !rcu_preempt_blocked_readers_cgp();
+ empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL;
+ np = t->rcu_node_entry.next;
+ if (np == &rcu_preempt_ctrlblk.blkd_tasks)
+ np = NULL;
+ list_del(&t->rcu_node_entry);
+ if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks)
+ rcu_preempt_ctrlblk.gp_tasks = np;
+ if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks)
+ rcu_preempt_ctrlblk.exp_tasks = np;
+ INIT_LIST_HEAD(&t->rcu_node_entry);
+
+ /*
+ * If this was the last task on the current list, and if
+ * we aren't waiting on the CPU, report the quiescent state
+ * and start a new grace period if needed.
+ */
+ if (!empty && !rcu_preempt_blocked_readers_cgp()) {
+ rcu_preempt_cpu_qs();
+ rcu_preempt_start_gp();
+ }
+
+ /*
+ * If this was the last task on the expedited lists,
+ * then we need wake up the waiting task.
+ */
+ if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL)
+ rcu_report_exp_done();
+ }
+ local_irq_restore(flags);
+}
+
+/*
+ * Tiny-preemptible RCU implementation for rcu_read_unlock().
+ * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
+ * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
+ * invoke rcu_read_unlock_special() to clean up after a context switch
+ * in an RCU read-side critical section and other special cases.
+ */
+void __rcu_read_unlock(void)
+{
+ struct task_struct *t = current;
+
+ barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */
+ --t->rcu_read_lock_nesting;
+ barrier(); /* decrement before load of ->rcu_read_unlock_special */
+ if (t->rcu_read_lock_nesting == 0 &&
+ unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+ rcu_read_unlock_special(t);
+#ifdef CONFIG_PROVE_LOCKING
+ WARN_ON_ONCE(t->rcu_read_lock_nesting < 0);
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+
+/*
+ * Check for a quiescent state from the current CPU. When a task blocks,
+ * the task is recorded in the rcu_preempt_ctrlblk structure, which is
+ * checked elsewhere. This is called from the scheduling-clock interrupt.
+ *
+ * Caller must disable hard irqs.
+ */
+static void rcu_preempt_check_callbacks(void)
+{
+ struct task_struct *t = current;
+
+ if (rcu_preempt_gp_in_progress() &&
+ (!rcu_preempt_running_reader() ||
+ !rcu_cpu_blocking_cur_gp()))
+ rcu_preempt_cpu_qs();
+ if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
+ rcu_preempt_ctrlblk.rcb.donetail)
+ raise_softirq(RCU_SOFTIRQ);
+ if (rcu_preempt_gp_in_progress() &&
+ rcu_cpu_blocking_cur_gp() &&
+ rcu_preempt_running_reader())
+ t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
+}
+
+/*
+ * TINY_PREEMPT_RCU has an extra callback-list tail pointer to
+ * update, so this is invoked from __rcu_process_callbacks() to
+ * handle that case. Of course, it is invoked for all flavors of
+ * RCU, but RCU callbacks can appear only on one of the lists, and
+ * neither ->nexttail nor ->donetail can possibly be NULL, so there
+ * is no need for an explicit check.
+ */
+static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
+{
+ if (rcu_preempt_ctrlblk.nexttail == rcp->donetail)
+ rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist;
+}
+
+/*
+ * Process callbacks for preemptible RCU.
+ */
+static void rcu_preempt_process_callbacks(void)
+{
+ __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
+}
+
+/*
+ * Queue a preemptible -RCU callback for invocation after a grace period.
+ */
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+ unsigned long flags;
+
+ debug_rcu_head_queue(head);
+ head->func = func;
+ head->next = NULL;
+
+ local_irq_save(flags);
+ *rcu_preempt_ctrlblk.nexttail = head;
+ rcu_preempt_ctrlblk.nexttail = &head->next;
+ rcu_preempt_start_gp(); /* checks to see if GP needed. */
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+
+void rcu_barrier(void)
+{
+ struct rcu_synchronize rcu;
+
+ init_rcu_head_on_stack(&rcu.head);
+ init_completion(&rcu.completion);
+ /* Will wake me after RCU finished. */
+ call_rcu(&rcu.head, wakeme_after_rcu);
+ /* Wait for it. */
+ wait_for_completion(&rcu.completion);
+ destroy_rcu_head_on_stack(&rcu.head);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
+/*
+ * synchronize_rcu - wait until a grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full grace
+ * period has elapsed, in other words after all currently executing RCU
+ * read-side critical sections have completed. RCU read-side critical
+ * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
+ * and may be nested.
+ */
+void synchronize_rcu(void)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ if (!rcu_scheduler_active)
+ return;
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+ WARN_ON_ONCE(rcu_preempt_running_reader());
+ if (!rcu_preempt_blocked_readers_any())
+ return;
+
+ /* Once we get past the fastpath checks, same code as rcu_barrier(). */
+ rcu_barrier();
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu);
+
+static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq);
+static unsigned long sync_rcu_preempt_exp_count;
+static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex);
+
+/*
+ * Return non-zero if there are any tasks in RCU read-side critical
+ * sections blocking the current preemptible-RCU expedited grace period.
+ * If there is no preemptible-RCU expedited grace period currently in
+ * progress, returns zero unconditionally.
+ */
+static int rcu_preempted_readers_exp(void)
+{
+ return rcu_preempt_ctrlblk.exp_tasks != NULL;
+}
+
+/*
+ * Report the exit from RCU read-side critical section for the last task
+ * that queued itself during or before the current expedited preemptible-RCU
+ * grace period.
+ */
+static void rcu_report_exp_done(void)
+{
+ wake_up(&sync_rcu_preempt_exp_wq);
+}
+
+/*
+ * Wait for an rcu-preempt grace period, but expedite it. The basic idea
+ * is to rely in the fact that there is but one CPU, and that it is
+ * illegal for a task to invoke synchronize_rcu_expedited() while in a
+ * preemptible-RCU read-side critical section. Therefore, any such
+ * critical sections must correspond to blocked tasks, which must therefore
+ * be on the ->blkd_tasks list. So just record the current head of the
+ * list in the ->exp_tasks pointer, and wait for all tasks including and
+ * after the task pointed to by ->exp_tasks to drain.
+ */
+void synchronize_rcu_expedited(void)
+{
+ unsigned long flags;
+ struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk;
+ unsigned long snap;
+
+ barrier(); /* ensure prior action seen before grace period. */
+
+ WARN_ON_ONCE(rcu_preempt_running_reader());
+
+ /*
+ * Acquire lock so that there is only one preemptible RCU grace
+ * period in flight. Of course, if someone does the expedited
+ * grace period for us while we are acquiring the lock, just leave.
+ */
+ snap = sync_rcu_preempt_exp_count + 1;
+ mutex_lock(&sync_rcu_preempt_exp_mutex);
+ if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count))
+ goto unlock_mb_ret; /* Others did our work for us. */
+
+ local_irq_save(flags);
+
+ /*
+ * All RCU readers have to already be on blkd_tasks because
+ * we cannot legally be executing in an RCU read-side critical
+ * section.
+ */
+
+ /* Snapshot current head of ->blkd_tasks list. */
+ rpcp->exp_tasks = rpcp->blkd_tasks.next;
+ if (rpcp->exp_tasks == &rpcp->blkd_tasks)
+ rpcp->exp_tasks = NULL;
+ local_irq_restore(flags);
+
+ /* Wait for tail of ->blkd_tasks list to drain. */
+ if (rcu_preempted_readers_exp())
+ wait_event(sync_rcu_preempt_exp_wq,
+ !rcu_preempted_readers_exp());
+
+ /* Clean up and exit. */
+ barrier(); /* ensure expedited GP seen before counter increment. */
+ sync_rcu_preempt_exp_count++;
+unlock_mb_ret:
+ mutex_unlock(&sync_rcu_preempt_exp_mutex);
+ barrier(); /* ensure subsequent action seen after grace period. */
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
+
+/*
+ * Does preemptible RCU need the CPU to stay out of dynticks mode?
+ */
+int rcu_preempt_needs_cpu(void)
+{
+ if (!rcu_preempt_running_reader())
+ rcu_preempt_cpu_qs();
+ return rcu_preempt_ctrlblk.rcb.rcucblist != NULL;
+}
+
+/*
+ * Check for a task exiting while in a preemptible -RCU read-side
+ * critical section, clean up if so. No need to issue warnings,
+ * as debug_check_no_locks_held() already does this if lockdep
+ * is enabled.
+ */
+void exit_rcu(void)
+{
+ struct task_struct *t = current;
+
+ if (t->rcu_read_lock_nesting == 0)
+ return;
+ t->rcu_read_lock_nesting = 1;
+ rcu_read_unlock();
+}
+
+#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */
+
+/*
+ * Because preemptible RCU does not exist, it never has any callbacks
+ * to check.
+ */
+static void rcu_preempt_check_callbacks(void)
+{
+}
+
+/*
+ * Because preemptible RCU does not exist, it never has any callbacks
+ * to remove.
+ */
+static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
+{
+}
+
+/*
+ * Because preemptible RCU does not exist, it never has any callbacks
+ * to process.
+ */
+static void rcu_preempt_process_callbacks(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#include
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 2e2726d..7297102 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -303,6 +303,10 @@ static void rcu_read_delay(struct rcu_random_state *rrsp)
mdelay(longdelay_ms);
if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us)))
udelay(shortdelay_us);
+#ifdef CONFIG_PREEMPT
+ if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000)))
+ preempt_schedule(); /* No QS if preempt_disable() in effect */
+#endif
}
static void rcu_torture_read_unlock(int idx) __releases(RCU)
@@ -536,6 +540,8 @@ static void srcu_read_delay(struct rcu_random_state *rrsp)
delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick);
if (!delay)
schedule_timeout_interruptible(longdelay);
+ else
+ rcu_read_delay(rrsp);
}
static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl)
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index d5bc439..42140a8 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -143,6 +143,11 @@ module_param(blimit, int, 0);
module_param(qhimark, int, 0);
module_param(qlowmark, int, 0);
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT;
+module_param(rcu_cpu_stall_suppress, int, 0644);
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
static int rcu_pending(int cpu);
@@ -450,7 +455,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
-int rcu_cpu_stall_panicking __read_mostly;
+int rcu_cpu_stall_suppress __read_mostly;
static void record_gp_stall_check_time(struct rcu_state *rsp)
{
@@ -482,8 +487,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
rcu_print_task_stall(rnp);
raw_spin_unlock_irqrestore(&rnp->lock, flags);
- /* OK, time to rat on our buddy... */
-
+ /*
+ * OK, time to rat on our buddy...
+ * See Documentation/RCU/stallwarn.txt for info on how to debug
+ * RCU CPU stall warnings.
+ */
printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {",
rsp->name);
rcu_for_each_leaf_node(rsp, rnp) {
@@ -512,6 +520,11 @@ static void print_cpu_stall(struct rcu_state *rsp)
unsigned long flags;
struct rcu_node *rnp = rcu_get_root(rsp);
+ /*
+ * OK, time to rat on ourselves...
+ * See Documentation/RCU/stallwarn.txt for info on how to debug
+ * RCU CPU stall warnings.
+ */
printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
trigger_all_cpu_backtrace();
@@ -530,7 +543,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
long delta;
struct rcu_node *rnp;
- if (rcu_cpu_stall_panicking)
+ if (rcu_cpu_stall_suppress)
return;
delta = jiffies - rsp->jiffies_stall;
rnp = rdp->mynode;
@@ -548,10 +561,26 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr)
{
- rcu_cpu_stall_panicking = 1;
+ rcu_cpu_stall_suppress = 1;
return NOTIFY_DONE;
}
+/**
+ * rcu_cpu_stall_reset - prevent further stall warnings in current grace period
+ *
+ * Set the stall-warning timeout way off into the future, thus preventing
+ * any RCU CPU stall-warning messages from appearing in the current set of
+ * RCU grace periods.
+ *
+ * The caller must disable hard irqs.
+ */
+void rcu_cpu_stall_reset(void)
+{
+ rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+ rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+ rcu_preempt_stall_reset();
+}
+
static struct notifier_block rcu_panic_block = {
.notifier_call = rcu_panic,
};
@@ -571,6 +600,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
{
}
+void rcu_cpu_stall_reset(void)
+{
+}
+
static void __init check_cpu_stall_init(void)
{
}
@@ -712,7 +745,7 @@ static void
rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
__releases(rcu_get_root(rsp)->lock)
{
- struct rcu_data *rdp = rsp->rda[smp_processor_id()];
+ struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
struct rcu_node *rnp = rcu_get_root(rsp);
if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) {
@@ -960,7 +993,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
{
int i;
- struct rcu_data *rdp = rsp->rda[smp_processor_id()];
+ struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
if (rdp->nxtlist == NULL)
return; /* irqs disabled, so comparison is stable. */
@@ -984,7 +1017,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
struct rcu_data *rdp;
raw_spin_lock_irqsave(&rsp->onofflock, flags);
- rdp = rsp->rda[smp_processor_id()];
+ rdp = this_cpu_ptr(rsp->rda);
if (rsp->orphan_cbs_list == NULL) {
raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
return;
@@ -1007,7 +1040,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
unsigned long flags;
unsigned long mask;
int need_report = 0;
- struct rcu_data *rdp = rsp->rda[cpu];
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_node *rnp;
/* Exclude any attempts to start a new grace period. */
@@ -1226,7 +1259,8 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
cpu = rnp->grplo;
bit = 1;
for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
- if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu]))
+ if ((rnp->qsmask & bit) != 0 &&
+ f(per_cpu_ptr(rsp->rda, cpu)))
mask |= bit;
}
if (mask != 0) {
@@ -1402,7 +1436,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
* a quiescent state betweentimes.
*/
local_irq_save(flags);
- rdp = rsp->rda[smp_processor_id()];
+ rdp = this_cpu_ptr(rsp->rda);
rcu_process_gp_end(rsp, rdp);
check_for_new_grace_period(rsp, rdp);
@@ -1701,7 +1735,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
{
unsigned long flags;
int i;
- struct rcu_data *rdp = rsp->rda[cpu];
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
@@ -1729,7 +1763,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
{
unsigned long flags;
unsigned long mask;
- struct rcu_data *rdp = rsp->rda[cpu];
+ struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_node *rnp = rcu_get_root(rsp);
/* Set up local state, ensuring consistent view of global state. */
@@ -1865,7 +1899,8 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
/*
* Helper function for rcu_init() that initializes one rcu_state structure.
*/
-static void __init rcu_init_one(struct rcu_state *rsp)
+static void __init rcu_init_one(struct rcu_state *rsp,
+ struct rcu_data __percpu *rda)
{
static char *buf[] = { "rcu_node_level_0",
"rcu_node_level_1",
@@ -1918,37 +1953,23 @@ static void __init rcu_init_one(struct rcu_state *rsp)
}
}
+ rsp->rda = rda;
rnp = rsp->level[NUM_RCU_LVLS - 1];
for_each_possible_cpu(i) {
while (i > rnp->grphi)
rnp++;
- rsp->rda[i]->mynode = rnp;
+ per_cpu_ptr(rsp->rda, i)->mynode = rnp;
rcu_boot_init_percpu_data(i, rsp);
}
}
-/*
- * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used
- * nowhere else! Assigns leaf node pointers into each CPU's rcu_data
- * structure.
- */
-#define RCU_INIT_FLAVOR(rsp, rcu_data) \
-do { \
- int i; \
- \
- for_each_possible_cpu(i) { \
- (rsp)->rda[i] = &per_cpu(rcu_data, i); \
- } \
- rcu_init_one(rsp); \
-} while (0)
-
void __init rcu_init(void)
{
int cpu;
rcu_bootup_announce();
- RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
- RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
+ rcu_init_one(&rcu_sched_state, &rcu_sched_data);
+ rcu_init_one(&rcu_bh_state, &rcu_bh_data);
__rcu_init_preempt();
open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 14c040b..7918ba6 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -254,19 +254,23 @@ struct rcu_data {
#define RCU_STALL_DELAY_DELTA 0
#endif
-#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA)
+#define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \
+ RCU_STALL_DELAY_DELTA)
/* for rsp->jiffies_stall */
-#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA)
+#define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30)
/* for rsp->jiffies_stall */
#define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */
/* to take at least one */
/* scheduling clock irq */
/* before ratting on them. */
-#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE
+#define RCU_CPU_STALL_SUPPRESS_INIT 0
+#else
+#define RCU_CPU_STALL_SUPPRESS_INIT 1
+#endif
-#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
-#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
/*
* RCU global state, including node hierarchy. This hierarchy is
@@ -283,7 +287,7 @@ struct rcu_state {
struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */
u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */
u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */
- struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */
+ struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
/* The following fields are guarded by the root rcu_node's lock. */
@@ -365,6 +369,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
static void rcu_print_detail_task_stall(struct rcu_state *rsp);
static void rcu_print_task_stall(struct rcu_node *rnp);
+static void rcu_preempt_stall_reset(void);
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 0e4f420..71a4147 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -57,7 +57,7 @@ static void __init rcu_bootup_announce_oddness(void)
printk(KERN_INFO
"\tRCU-based detection of stalled CPUs is disabled.\n");
#endif
-#ifndef CONFIG_RCU_CPU_STALL_VERBOSE
+#if defined(CONFIG_TREE_PREEMPT_RCU) && !defined(CONFIG_RCU_CPU_STALL_VERBOSE)
printk(KERN_INFO "\tVerbose stalled-CPUs detection is disabled.\n");
#endif
#if NUM_RCU_LVL_4 != 0
@@ -154,7 +154,7 @@ static void rcu_preempt_note_context_switch(int cpu)
(t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
/* Possibly blocking in an RCU read-side critical section. */
- rdp = rcu_preempt_state.rda[cpu];
+ rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
rnp = rdp->mynode;
raw_spin_lock_irqsave(&rnp->lock, flags);
t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
@@ -201,7 +201,7 @@ static void rcu_preempt_note_context_switch(int cpu)
*/
void __rcu_read_lock(void)
{
- ACCESS_ONCE(current->rcu_read_lock_nesting)++;
+ current->rcu_read_lock_nesting++;
barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
@@ -344,7 +344,9 @@ void __rcu_read_unlock(void)
struct task_struct *t = current;
barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
- if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
+ --t->rcu_read_lock_nesting;
+ barrier(); /* decrement before load of ->rcu_read_unlock_special */
+ if (t->rcu_read_lock_nesting == 0 &&
unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
rcu_read_unlock_special(t);
#ifdef CONFIG_PROVE_LOCKING
@@ -417,6 +419,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
}
}
+/*
+ * Suppress preemptible RCU's CPU stall warnings by pushing the
+ * time of the next stall-warning message comfortably far into the
+ * future.
+ */
+static void rcu_preempt_stall_reset(void)
+{
+ rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2;
+}
+
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
/*
@@ -546,9 +558,11 @@ EXPORT_SYMBOL_GPL(call_rcu);
*
* Control will return to the caller some time after a full grace
* period has elapsed, in other words after all currently executing RCU
- * read-side critical sections have completed. RCU read-side critical
- * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
- * and may be nested.
+ * read-side critical sections have completed. Note, however, that
+ * upon return from synchronize_rcu(), the caller might well be executing
+ * concurrently with new RCU read-side critical sections that began while
+ * synchronize_rcu() was waiting. RCU read-side critical sections are
+ * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
*/
void synchronize_rcu(void)
{
@@ -771,7 +785,7 @@ static void rcu_preempt_send_cbs_to_orphanage(void)
*/
static void __init __rcu_init_preempt(void)
{
- RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data);
+ rcu_init_one(&rcu_preempt_state, &rcu_preempt_data);
}
/*
@@ -865,6 +879,14 @@ static void rcu_print_task_stall(struct rcu_node *rnp)
{
}
+/*
+ * Because preemptible RCU does not exist, there is no need to suppress
+ * its CPU stall warnings.
+ */
+static void rcu_preempt_stall_reset(void)
+{
+}
+
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
/*
@@ -919,15 +941,6 @@ static void rcu_preempt_process_callbacks(void)
}
/*
- * In classic RCU, call_rcu() is just call_rcu_sched().
- */
-void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
-{
- call_rcu_sched(head, func);
-}
-EXPORT_SYMBOL_GPL(call_rcu);
-
-/*
* Wait for an rcu-preempt grace period, but make it happen quickly.
* But because preemptable RCU does not exist, map to rcu-sched.
*/
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 36c95b4..458e032 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -262,7 +262,7 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
struct rcu_data *rdp;
for_each_possible_cpu(cpu) {
- rdp = rsp->rda[cpu];
+ rdp = per_cpu_ptr(rsp->rda, cpu);
if (rdp->beenonline)
print_one_rcu_pending(m, rdp);
}
diff --git a/kernel/sched.c b/kernel/sched.c
index 41541d7..b600c0a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9105,6 +9105,7 @@ struct cgroup_subsys cpuacct_subsys = {
};
#endif /* CONFIG_CGROUP_CPUACCT */
+#ifndef CONFIG_RCURING
#ifndef CONFIG_SMP
void synchronize_sched_expedited(void)
@@ -9174,3 +9175,4 @@ void synchronize_sched_expedited(void)
EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
#endif /* #else #ifndef CONFIG_SMP */
+#endif /* CONFIG_RCURING */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3e216e0..0eba561 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -269,6 +269,10 @@ void tick_nohz_stop_sched_tick(int inidle)
cpu = smp_processor_id();
ts = &per_cpu(tick_cpu_sched, cpu);
+ /* Don't enter nohz when cpu is offlining, it is going to die */
+ if (cpu_is_offline(cpu))
+ goto end;
+
/*
* Call to tick_nohz_start_idle stops the last_update_time from being
* updated. Thus, it must not be called in the event we are called from
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 3632ce8..19cccc3 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3846,6 +3846,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
rpos = reader->read;
pos += size;
+ if (rpos >= commit)
+ break;
+
event = rb_reader_event(cpu_buffer);
size = rb_event_length(event);
} while (len > size);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ba14a22..9ec59f5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3463,6 +3463,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
char *buf;
+ size_t written;
if (tracing_disabled)
return -EINVAL;
@@ -3484,11 +3485,15 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
} else
buf[cnt] = '\0';
- cnt = mark_printk("%s", buf);
+ written = mark_printk("%s", buf);
kfree(buf);
- *fpos += cnt;
+ *fpos += written;
- return cnt;
+ /* don't tell userspace we wrote more - it might confuse them */
+ if (written > cnt)
+ written = cnt;
+
+ return written;
}
static int tracing_clock_show(struct seq_file *m, void *v)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 09b4fa6..4c758f1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -598,88 +598,165 @@ out:
return ret;
}
-static void print_event_fields(struct trace_seq *s, struct list_head *head)
+enum {
+ FORMAT_HEADER = 1,
+ FORMAT_PRINTFMT = 2,
+};
+
+static void *f_next(struct seq_file *m, void *v, loff_t *pos)
{
+ struct ftrace_event_call *call = m->private;
struct ftrace_event_field *field;
+ struct list_head *head;
- list_for_each_entry_reverse(field, head, link) {
- /*
- * Smartly shows the array type(except dynamic array).
- * Normal:
- * field:TYPE VAR
- * If TYPE := TYPE[LEN], it is shown:
- * field:TYPE VAR[LEN]
- */
- const char *array_descriptor = strchr(field->type, '[');
+ (*pos)++;
- if (!strncmp(field->type, "__data_loc", 10))
- array_descriptor = NULL;
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:
+ head = &ftrace_common_fields;
- if (!array_descriptor) {
- trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- field->type, field->name, field->offset,
- field->size, !!field->is_signed);
- } else {
- trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
- "\tsize:%u;\tsigned:%d;\n",
- (int)(array_descriptor - field->type),
- field->type, field->name,
- array_descriptor, field->offset,
- field->size, !!field->is_signed);
- }
+ if (unlikely(list_empty(head)))
+ return NULL;
+
+ field = list_entry(head->prev, struct ftrace_event_field, link);
+ return field;
+
+ case FORMAT_PRINTFMT:
+ /* all done */
+ return NULL;
+ }
+
+ head = trace_get_fields(call);
+
+ /*
+ * To separate common fields from event fields, the
+ * LSB is set on the first event field. Clear it in case.
+ */
+ v = (void *)((unsigned long)v & ~1L);
+
+ field = v;
+ /*
+ * If this is a common field, and at the end of the list, then
+ * continue with main list.
+ */
+ if (field->link.prev == &ftrace_common_fields) {
+ if (unlikely(list_empty(head)))
+ return NULL;
+ field = list_entry(head->prev, struct ftrace_event_field, link);
+ /* Set the LSB to notify f_show to print an extra newline */
+ field = (struct ftrace_event_field *)
+ ((unsigned long)field | 1);
+ return field;
}
+
+ /* If we are done tell f_show to print the format */
+ if (field->link.prev == head)
+ return (void *)FORMAT_PRINTFMT;
+
+ field = list_entry(field->link.prev, struct ftrace_event_field, link);
+
+ return field;
}
-static ssize_t
-event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
- loff_t *ppos)
+static void *f_start(struct seq_file *m, loff_t *pos)
{
- struct ftrace_event_call *call = filp->private_data;
- struct list_head *head;
- struct trace_seq *s;
- char *buf;
- int r;
+ loff_t l = 0;
+ void *p;
- if (*ppos)
+ /* Start by showing the header */
+ if (!*pos)
+ return (void *)FORMAT_HEADER;
+
+ p = (void *)FORMAT_HEADER;
+ do {
+ p = f_next(m, p, &l);
+ } while (p && l < *pos);
+
+ return p;
+}
+
+static int f_show(struct seq_file *m, void *v)
+{
+ struct ftrace_event_call *call = m->private;
+ struct ftrace_event_field *field;
+ const char *array_descriptor;
+
+ switch ((unsigned long)v) {
+ case FORMAT_HEADER:
+ seq_printf(m, "name: %s\n", call->name);
+ seq_printf(m, "ID: %d\n", call->event.type);
+ seq_printf(m, "format:\n");
return 0;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
+ case FORMAT_PRINTFMT:
+ seq_printf(m, "\nprint fmt: %s\n",
+ call->print_fmt);
+ return 0;
+ }
- trace_seq_init(s);
+ /*
+ * To separate common fields from event fields, the
+ * LSB is set on the first event field. Clear it and
+ * print a newline if it is set.
+ */
+ if ((unsigned long)v & 1) {
+ seq_putc(m, '\n');
+ v = (void *)((unsigned long)v & ~1L);
+ }
- trace_seq_printf(s, "name: %s\n", call->name);
- trace_seq_printf(s, "ID: %d\n", call->event.type);
- trace_seq_printf(s, "format:\n");
+ field = v;
- /* print common fields */
- print_event_fields(s, &ftrace_common_fields);
+ /*
+ * Smartly shows the array type(except dynamic array).
+ * Normal:
+ * field:TYPE VAR
+ * If TYPE := TYPE[LEN], it is shown:
+ * field:TYPE VAR[LEN]
+ */
+ array_descriptor = strchr(field->type, '[');
- trace_seq_putc(s, '\n');
+ if (!strncmp(field->type, "__data_loc", 10))
+ array_descriptor = NULL;
- /* print event specific fields */
- head = trace_get_fields(call);
- print_event_fields(s, head);
+ if (!array_descriptor)
+ seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ field->type, field->name, field->offset,
+ field->size, !!field->is_signed);
+ else
+ seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
+ (int)(array_descriptor - field->type),
+ field->type, field->name,
+ array_descriptor, field->offset,
+ field->size, !!field->is_signed);
- r = trace_seq_printf(s, "\nprint fmt: %s\n", call->print_fmt);
+ return 0;
+}
- if (!r) {
- /*
- * ug! The format output is bigger than a PAGE!!
- */
- buf = "FORMAT TOO BIG\n";
- r = simple_read_from_buffer(ubuf, cnt, ppos,
- buf, strlen(buf));
- goto out;
- }
+static void f_stop(struct seq_file *m, void *p)
+{
+}
- r = simple_read_from_buffer(ubuf, cnt, ppos,
- s->buffer, s->len);
- out:
- kfree(s);
- return r;
+static const struct seq_operations trace_format_seq_ops = {
+ .start = f_start,
+ .next = f_next,
+ .stop = f_stop,
+ .show = f_show,
+};
+
+static int trace_format_open(struct inode *inode, struct file *file)
+{
+ struct ftrace_event_call *call = inode->i_private;
+ struct seq_file *m;
+ int ret;
+
+ ret = seq_open(file, &trace_format_seq_ops);
+ if (ret < 0)
+ return ret;
+
+ m = file->private_data;
+ m->private = call;
+
+ return 0;
}
static ssize_t
@@ -877,8 +954,10 @@ static const struct file_operations ftrace_enable_fops = {
};
static const struct file_operations ftrace_event_format_fops = {
- .open = tracing_open_generic,
- .read = event_format_read,
+ .open = trace_format_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
};
static const struct file_operations ftrace_event_id_fops = {
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6bff236..6f23369 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -507,7 +507,15 @@ get_return_for_leaf(struct trace_iterator *iter,
* if the output fails.
*/
data->ent = *curr;
- data->ret = *next;
+ /*
+ * If the next event is not a return type, then
+ * we only care about what type it is. Otherwise we can
+ * safely copy the entire event.
+ */
+ if (next->ent.type == TRACE_GRAPH_RET)
+ data->ret = *next;
+ else
+ data->ret.ent.type = next->ent.type;
}
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2994a0e..8bd600c 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -35,6 +35,9 @@
#include
#include
+#define CREATE_TRACE_POINTS
+#include
+
#include "workqueue_sched.h"
enum {
@@ -1790,7 +1793,13 @@ static void process_one_work(struct worker *worker, struct work_struct *work)
work_clear_pending(work);
lock_map_acquire(&cwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
+ trace_workqueue_execute_start(work);
f(work);
+ /*
+ * While we must be careful to not use "work" after this, the trace
+ * point will only record its address.
+ */
+ trace_workqueue_execute_end(work);
lock_map_release(&lockdep_map);
lock_map_release(&cwq->wq->lockdep_map);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 9e06b7f..52c2172 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -539,6 +539,19 @@ config PROVE_RCU_REPEATEDLY
disabling, allowing multiple RCU-lockdep warnings to be printed
on a single reboot.
+config SPARSE_RCU_POINTER
+ bool "RCU debugging: sparse-based checks for pointer usage"
+ default n
+ help
+ This feature enables the __rcu sparse annotation for
+ RCU-protected pointers. This annotation will cause sparse
+ to flag any non-RCU used of annotated pointers. This can be
+ helpful when debugging RCU usage. Please note that this feature
+ is not intended to enforce code cleanliness; it is instead merely
+ a debugging aid.
+
+ Say Y to make sparse flag questionable use of RCU-protected pointers
+
Say N if you are unsure.
config LOCKDEP
@@ -832,6 +845,30 @@ config RCU_CPU_STALL_DETECTOR
Say Y if you are unsure.
+config RCU_CPU_STALL_TIMEOUT
+ int "RCU CPU stall timeout in seconds"
+ depends on RCU_CPU_STALL_DETECTOR
+ range 3 300
+ default 60
+ help
+ If a given RCU grace period extends more than the specified
+ number of seconds, a CPU stall warning is printed. If the
+ RCU grace period persists, additional CPU stall warnings are
+ printed at more widely spaced intervals.
+
+config RCU_CPU_STALL_DETECTOR_RUNNABLE
+ bool "RCU CPU stall checking starts automatically at boot"
+ depends on RCU_CPU_STALL_DETECTOR
+ default y
+ help
+ If set, start checking for RCU CPU stalls immediately on
+ boot. Otherwise, RCU CPU stall checking must be manually
+ enabled.
+
+ Say Y if you are unsure.
+
+ Say N if you wish to suppress RCU CPU stall checking during boot.
+
config RCU_CPU_STALL_VERBOSE
bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR"
depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU
@@ -994,13 +1031,16 @@ config FAULT_INJECTION_STACKTRACE_FILTER
config LATENCYTOP
bool "Latency measuring infrastructure"
+ depends on HAVE_LATENCYTOP_SUPPORT
+ depends on DEBUG_KERNEL
+ depends on STACKTRACE_SUPPORT
+ depends on PROC_FS
select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE
select KALLSYMS
select KALLSYMS_ALL
select STACKTRACE
select SCHEDSTATS
select SCHED_DEBUG
- depends on HAVE_LATENCYTOP_SUPPORT
help
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index e907858..0ccbcdf 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -49,7 +49,7 @@ struct radix_tree_node {
unsigned int height; /* Height from the bottom */
unsigned int count;
struct rcu_head rcu_head;
- void *slots[RADIX_TREE_MAP_SIZE];
+ void __rcu *slots[RADIX_TREE_MAP_SIZE];
unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
};
@@ -625,6 +625,8 @@ EXPORT_SYMBOL(radix_tree_tag_get);
*
* The function returns number of leaves where the tag was set and sets
* *first_indexp to the first unscanned index.
+ * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
+ * be prepared to handle that.
*/
unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
unsigned long *first_indexp, unsigned long last_index,
@@ -675,7 +677,8 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
next:
/* Go to next item at level determined by 'shift' */
index = ((index >> shift) + 1) << shift;
- if (index > last_index)
+ /* Overflow can happen when last_index is ~0UL... */
+ if (index > last_index || !index)
break;
if (tagged >= nr_to_tag)
break;
diff --git a/mm/memory.c b/mm/memory.c
index b6e5fd2..2ed2267 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2770,11 +2770,18 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
{
address &= PAGE_MASK;
if ((vma->vm_flags & VM_GROWSDOWN) && address == vma->vm_start) {
- address -= PAGE_SIZE;
- if (find_vma(vma->vm_mm, address) != vma)
- return -ENOMEM;
+ struct vm_area_struct *prev = vma->vm_prev;
+
+ /*
+ * Is there a mapping abutting this one below?
+ *
+ * That's only ok if it's the same stack mapping
+ * that has gotten split..
+ */
+ if (prev && prev->vm_end == address)
+ return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
- expand_stack(vma, address);
+ expand_stack(vma, address - PAGE_SIZE);
}
return 0;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index 49e5e4c..cbae7c5 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -135,6 +135,19 @@ void munlock_vma_page(struct page *page)
}
}
+/* Is the vma a continuation of the stack vma above it? */
+static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr)
+{
+ return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN);
+}
+
+static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr)
+{
+ return (vma->vm_flags & VM_GROWSDOWN) &&
+ (vma->vm_start == addr) &&
+ !vma_stack_continue(vma->vm_prev, addr);
+}
+
/**
* __mlock_vma_pages_range() - mlock a range of pages in the vma.
* @vma: target vma
@@ -168,11 +181,9 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
gup_flags |= FOLL_WRITE;
/* We don't try to access the guard page of a stack vma */
- if (vma->vm_flags & VM_GROWSDOWN) {
- if (start == vma->vm_start) {
- start += PAGE_SIZE;
- nr_pages--;
- }
+ if (stack_guard_page(vma, start)) {
+ addr += PAGE_SIZE;
+ nr_pages--;
}
while (nr_pages > 0) {
diff --git a/mm/mmap.c b/mm/mmap.c
index 3100333..331e51a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -388,17 +388,23 @@ static inline void
__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev, struct rb_node *rb_parent)
{
+ struct vm_area_struct *next;
+
+ vma->vm_prev = prev;
if (prev) {
- vma->vm_next = prev->vm_next;
+ next = prev->vm_next;
prev->vm_next = vma;
} else {
mm->mmap = vma;
if (rb_parent)
- vma->vm_next = rb_entry(rb_parent,
+ next = rb_entry(rb_parent,
struct vm_area_struct, vm_rb);
else
- vma->vm_next = NULL;
+ next = NULL;
}
+ vma->vm_next = next;
+ if (next)
+ next->vm_prev = vma;
}
void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -483,7 +489,11 @@ static inline void
__vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *prev)
{
- prev->vm_next = vma->vm_next;
+ struct vm_area_struct *next = vma->vm_next;
+
+ prev->vm_next = next;
+ if (next)
+ next->vm_prev = prev;
rb_erase(&vma->vm_rb, &mm->mm_rb);
if (mm->mmap_cache == vma)
mm->mmap_cache = prev;
@@ -1915,6 +1925,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr;
insertion_point = (prev ? &prev->vm_next : &mm->mmap);
+ vma->vm_prev = NULL;
do {
rb_erase(&vma->vm_rb, &mm->mm_rb);
mm->map_count--;
@@ -1922,6 +1933,8 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
vma = vma->vm_next;
} while (vma && vma->vm_start < end);
*insertion_point = vma;
+ if (vma)
+ vma->vm_prev = prev;
tail_vma->vm_next = NULL;
if (mm->unmap_area == arch_unmap_area)
addr = prev ? prev->vm_end : mm->mmap_base;
diff --git a/mm/nommu.c b/mm/nommu.c
index efa9a38..88ff091 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -604,7 +604,7 @@ static void protect_vma(struct vm_area_struct *vma, unsigned long flags)
*/
static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
{
- struct vm_area_struct *pvma, **pp;
+ struct vm_area_struct *pvma, **pp, *next;
struct address_space *mapping;
struct rb_node **p, *parent;
@@ -664,8 +664,11 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
break;
}
- vma->vm_next = *pp;
+ next = *pp;
*pp = vma;
+ vma->vm_next = next;
+ if (next)
+ next->vm_prev = vma;
}
/*
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 5014e50..fc81cb2 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -372,7 +372,7 @@ static void dump_tasks(const struct mem_cgroup *mem)
}
pr_info("[%5d] %5d %5d %8lu %8lu %3u %3d %5d %s\n",
- task->pid, __task_cred(task)->uid, task->tgid,
+ task->pid, task_uid(task), task->tgid,
task->mm->total_vm, get_mm_rss(task->mm),
task_cpu(task), task->signal->oom_adj,
task->signal->oom_score_adj, task->comm);
@@ -401,10 +401,9 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem)
{
p = find_lock_task_mm(p);
- if (!p) {
- task_unlock(p);
+ if (!p)
return 1;
- }
+
pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
task_pid_nr(p), p->comm, K(p->mm->total_vm),
K(get_mm_counter(p->mm, MM_ANONPAGES)),
@@ -647,6 +646,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
unsigned long freed = 0;
unsigned int points;
enum oom_constraint constraint = CONSTRAINT_NONE;
+ int killed = 0;
blocking_notifier_call_chain(&oom_notify_list, 0, &freed);
if (freed > 0)
@@ -684,7 +684,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
if (!oom_kill_process(current, gfp_mask, order, 0, totalpages,
NULL, nodemask,
"Out of memory (oom_kill_allocating_task)"))
- return;
+ goto out;
}
retry:
@@ -692,7 +692,7 @@ retry:
constraint == CONSTRAINT_MEMORY_POLICY ? nodemask :
NULL);
if (PTR_ERR(p) == -1UL)
- return;
+ goto out;
/* Found nothing?!?! Either we hang forever, or we panic. */
if (!p) {
@@ -704,13 +704,15 @@ retry:
if (oom_kill_process(p, gfp_mask, order, points, totalpages, NULL,
nodemask, "Out of memory"))
goto retry;
+ killed = 1;
+out:
read_unlock(&tasklist_lock);
/*
* Give "p" a good chance of killing itself before we
* retry to allocate memory unless "p" is current
*/
- if (!test_thread_flag(TIF_MEMDIE))
+ if (killed && !test_thread_flag(TIF_MEMDIE))
schedule_timeout_uninterruptible(1);
}
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7262aac..c09ef52 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -836,7 +836,8 @@ void tag_pages_for_writeback(struct address_space *mapping,
spin_unlock_irq(&mapping->tree_lock);
WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH);
cond_resched();
- } while (tagged >= WRITEBACK_TAG_BATCH);
+ /* We check 'start' to handle wrapping when end == ~0UL */
+ } while (tagged >= WRITEBACK_TAG_BATCH && start);
}
EXPORT_SYMBOL(tag_pages_for_writeback);
diff --git a/mm/shmem.c b/mm/shmem.c
index dfaa0f4..080b09a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2325,7 +2325,10 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs)
static void shmem_put_super(struct super_block *sb)
{
- kfree(sb->s_fs_info);
+ struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+
+ percpu_counter_destroy(&sbinfo->used_blocks);
+ kfree(sbinfo);
sb->s_fs_info = NULL;
}
@@ -2367,7 +2370,8 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
#endif
spin_lock_init(&sbinfo->stat_lock);
- percpu_counter_init(&sbinfo->used_blocks, 0);
+ if (percpu_counter_init(&sbinfo->used_blocks, 0))
+ goto failed;
sbinfo->free_inodes = sbinfo->max_inodes;
sb->s_maxbytes = SHMEM_MAX_BYTES;
diff --git a/net/core/dev.c b/net/core/dev.c
index 1ae6543..3721fbb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3143,7 +3143,7 @@ pull:
put_page(skb_shinfo(skb)->frags[0].page);
memmove(skb_shinfo(skb)->frags,
skb_shinfo(skb)->frags + 1,
- --skb_shinfo(skb)->nr_frags);
+ --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
}
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 6bccba3..51d6c31 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -735,6 +735,7 @@ static void get_counters(const struct xt_table_info *t,
if (cpu == curcpu)
continue;
i = 0;
+ local_bh_disable();
xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) {
ADD_COUNTER(counters[i], iter->counters.bcnt,
@@ -742,6 +743,7 @@ static void get_counters(const struct xt_table_info *t,
++i;
}
xt_info_wrunlock(cpu);
+ local_bh_enable();
}
put_cpu();
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index c439721..97b64b2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -909,6 +909,7 @@ get_counters(const struct xt_table_info *t,
if (cpu == curcpu)
continue;
i = 0;
+ local_bh_disable();
xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) {
ADD_COUNTER(counters[i], iter->counters.bcnt,
@@ -916,6 +917,7 @@ get_counters(const struct xt_table_info *t,
++i; /* macro does multi eval of i */
}
xt_info_wrunlock(cpu);
+ local_bh_enable();
}
put_cpu();
}
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 8c8632d..957c924 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -38,7 +38,7 @@ static DEFINE_SPINLOCK(nf_nat_lock);
static struct nf_conntrack_l3proto *l3proto __read_mostly;
#define MAX_IP_NAT_PROTO 256
-static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
+static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO]
__read_mostly;
static inline const struct nf_nat_protocol *
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 5359ef4..29a7bca 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -922,6 +922,7 @@ get_counters(const struct xt_table_info *t,
if (cpu == curcpu)
continue;
i = 0;
+ local_bh_disable();
xt_info_wrlock(cpu);
xt_entry_foreach(iter, t->entries[cpu], t->size) {
ADD_COUNTER(counters[i], iter->counters.bcnt,
@@ -929,6 +930,7 @@ get_counters(const struct xt_table_info *t,
++i;
}
xt_info_wrunlock(cpu);
+ local_bh_enable();
}
put_cpu();
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 8f2d040..d126365 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2580,7 +2580,7 @@ ctl_table ipv6_route_table_template[] = {
.data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
+ .proc_handler = proc_dointvec,
},
{
.procname = "mtu_expires",
@@ -2594,7 +2594,7 @@ ctl_table ipv6_route_table_template[] = {
.data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
+ .proc_handler = proc_dointvec,
},
{
.procname = "gc_min_interval_ms",
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 78b505d..fdaec7d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -27,7 +27,7 @@
static DEFINE_MUTEX(afinfo_mutex);
-const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
+const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
EXPORT_SYMBOL(nf_afinfo);
int nf_register_afinfo(const struct nf_afinfo *afinfo)
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index cdcc764..5702de3 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -26,10 +26,10 @@
static DEFINE_MUTEX(nf_ct_ecache_mutex);
-struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly;
+struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
-struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly;
+struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly;
EXPORT_SYMBOL_GPL(nf_expect_event_cb);
/* deliver cached events and clear cache entry - must be called with locally
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 7dcf7a4..1d9bdae 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -16,7 +16,7 @@
#include
#include
-static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM];
+static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM];
static DEFINE_MUTEX(nf_ct_ext_type_mutex);
void __nf_ct_ext_destroy(struct nf_conn *ct)
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 5886ba1..ed6d929 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -28,8 +28,8 @@
#include
#include
-static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly;
-struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly;
+static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly;
+struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_l3protos);
static DEFINE_MUTEX(nf_ct_proto_mutex);
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 7df37fd..b07393e 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -16,7 +16,7 @@
#define NF_LOG_PREFIXLEN 128
#define NFLOGGER_NAME_LEN 64
-static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
+static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
static DEFINE_MUTEX(nf_log_mutex);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 78b3cf9..74aebed 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -18,7 +18,7 @@
* long term mutex. The handler must provide an an outfn() to accept packets
* for queueing and must reinject all packets it receives, no matter what.
*/
-static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
+static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly;
static DEFINE_MUTEX(queue_handler_mutex);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2cbf380..8648a99 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1406,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
struct netlink_sock *nlk = nlk_sk(sk);
int noblock = flags&MSG_DONTWAIT;
size_t copied;
- struct sk_buff *skb;
+ struct sk_buff *skb, *frag __maybe_unused = NULL;
int err;
if (flags&MSG_OOB)
@@ -1441,21 +1441,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
kfree_skb(skb);
skb = compskb;
} else {
- /*
- * Before setting frag_list to NULL, we must get a
- * private copy of skb if shared (because of MSG_PEEK)
- */
- if (skb_shared(skb)) {
- struct sk_buff *nskb;
-
- nskb = pskb_copy(skb, GFP_KERNEL);
- kfree_skb(skb);
- skb = nskb;
- err = -ENOMEM;
- if (!skb)
- goto out;
- }
- kfree_skb(skb_shinfo(skb)->frag_list);
+ frag = skb_shinfo(skb)->frag_list;
skb_shinfo(skb)->frag_list = NULL;
}
}
@@ -1492,6 +1478,10 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
if (flags & MSG_TRUNC)
copied = skb->len;
+#ifdef CONFIG_COMPAT_NETLINK_MESSAGES
+ skb_shinfo(skb)->frag_list = frag;
+#endif
+
skb_free_datagram(sk, skb);
if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 8406c66..c2ed90a 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -152,21 +152,24 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result
static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
unsigned char *b = skb_tail_pointer(skb);
- struct tc_gact opt;
struct tcf_gact *gact = a->priv;
+ struct tc_gact opt = {
+ .index = gact->tcf_index,
+ .refcnt = gact->tcf_refcnt - ref,
+ .bindcnt = gact->tcf_bindcnt - bind,
+ .action = gact->tcf_action,
+ };
struct tcf_t t;
- opt.index = gact->tcf_index;
- opt.refcnt = gact->tcf_refcnt - ref;
- opt.bindcnt = gact->tcf_bindcnt - bind;
- opt.action = gact->tcf_action;
NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt);
#ifdef CONFIG_GACT_PROB
if (gact->tcfg_ptype) {
- struct tc_gact_p p_opt;
- p_opt.paction = gact->tcfg_paction;
- p_opt.pval = gact->tcfg_pval;
- p_opt.ptype = gact->tcfg_ptype;
+ struct tc_gact_p p_opt = {
+ .paction = gact->tcfg_paction,
+ .pval = gact->tcfg_pval,
+ .ptype = gact->tcfg_ptype,
+ };
+
NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt);
}
#endif
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 11f195a..0c311be 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -219,15 +219,16 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_mirred *m = a->priv;
- struct tc_mirred opt;
+ struct tc_mirred opt = {
+ .index = m->tcf_index,
+ .action = m->tcf_action,
+ .refcnt = m->tcf_refcnt - ref,
+ .bindcnt = m->tcf_bindcnt - bind,
+ .eaction = m->tcfm_eaction,
+ .ifindex = m->tcfm_ifindex,
+ };
struct tcf_t t;
- opt.index = m->tcf_index;
- opt.action = m->tcf_action;
- opt.refcnt = m->tcf_refcnt - ref;
- opt.bindcnt = m->tcf_bindcnt - bind;
- opt.eaction = m->tcfm_eaction;
- opt.ifindex = m->tcfm_ifindex;
NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt);
t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 509a2d5..186eb83 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -272,19 +272,19 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_nat *p = a->priv;
- struct tc_nat opt;
+ struct tc_nat opt = {
+ .old_addr = p->old_addr,
+ .new_addr = p->new_addr,
+ .mask = p->mask,
+ .flags = p->flags,
+
+ .index = p->tcf_index,
+ .action = p->tcf_action,
+ .refcnt = p->tcf_refcnt - ref,
+ .bindcnt = p->tcf_bindcnt - bind,
+ };
struct tcf_t t;
- opt.old_addr = p->old_addr;
- opt.new_addr = p->new_addr;
- opt.mask = p->mask;
- opt.flags = p->flags;
-
- opt.index = p->tcf_index;
- opt.action = p->tcf_action;
- opt.refcnt = p->tcf_refcnt - ref;
- opt.bindcnt = p->tcf_bindcnt - bind;
-
NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt);
t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 4a1d640..97e84f3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -164,13 +164,14 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_defact *d = a->priv;
- struct tc_defact opt;
+ struct tc_defact opt = {
+ .index = d->tcf_index,
+ .refcnt = d->tcf_refcnt - ref,
+ .bindcnt = d->tcf_bindcnt - bind,
+ .action = d->tcf_action,
+ };
struct tcf_t t;
- opt.index = d->tcf_index;
- opt.refcnt = d->tcf_refcnt - ref;
- opt.bindcnt = d->tcf_bindcnt - bind;
- opt.action = d->tcf_action;
NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata);
t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index e9607fe..66cbf4e 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -159,13 +159,14 @@ static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_skbedit *d = a->priv;
- struct tc_skbedit opt;
+ struct tc_skbedit opt = {
+ .index = d->tcf_index,
+ .refcnt = d->tcf_refcnt - ref,
+ .bindcnt = d->tcf_bindcnt - bind,
+ .action = d->tcf_action,
+ };
struct tcf_t t;
- opt.index = d->tcf_index;
- opt.refcnt = d->tcf_refcnt - ref;
- opt.bindcnt = d->tcf_bindcnt - bind;
- opt.action = d->tcf_action;
NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt);
if (d->flags & SKBEDIT_F_PRIORITY)
NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 443c161..3376d76 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -18,10 +18,11 @@ config SUNRPC_XPRT_RDMA
If unsure, say N.
config RPCSEC_GSS_KRB5
- tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
- depends on SUNRPC && EXPERIMENTAL
+ tristate
+ depends on SUNRPC && CRYPTO
+ prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4)
+ default y
select SUNRPC_GSS
- select CRYPTO
select CRYPTO_MD5
select CRYPTO_DES
select CRYPTO_CBC
@@ -34,7 +35,7 @@ config RPCSEC_GSS_KRB5
available from http://linux-nfs.org/. In addition, user-space
Kerberos support should be installed.
- If unsure, say N.
+ If unsure, say Y.
config RPCSEC_GSS_SPKM3
tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)"
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e5e28d1..2ac3f6e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -249,6 +249,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
req->rl_nchunks = nchunks;
BUG_ON(nchunks == 0);
+ BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
+ && (nchunks > 3));
/*
* finish off header. If write, marshal discrim and nchunks.
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 27015c6..5f4c7b3 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -650,10 +650,22 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
- /* Add room for frmr register and invalidate WRs */
- ep->rep_attr.cap.max_send_wr *= 3;
- if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
- return -EINVAL;
+ /* Add room for frmr register and invalidate WRs.
+ * 1. FRMR reg WR for head
+ * 2. FRMR invalidate WR for head
+ * 3. FRMR reg WR for pagelist
+ * 4. FRMR invalidate WR for pagelist
+ * 5. FRMR reg WR for tail
+ * 6. FRMR invalidate WR for tail
+ * 7. The RDMA_SEND WR
+ */
+ ep->rep_attr.cap.max_send_wr *= 7;
+ if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
+ cdata->max_requests = devattr.max_qp_wr / 7;
+ if (!cdata->max_requests)
+ return -EINVAL;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+ }
break;
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:
@@ -1490,7 +1502,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
memset(&frmr_wr, 0, sizeof frmr_wr);
frmr_wr.opcode = IB_WR_FAST_REG_MR;
frmr_wr.send_flags = 0; /* unsignaled */
- frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma;
+ frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
frmr_wr.wr.fast_reg.page_list_len = i;
frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 49a62f0..b6309db 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1305,10 +1305,11 @@ static void xs_tcp_state_change(struct sock *sk)
if (!(xprt = xprt_from_sock(sk)))
goto out;
dprintk("RPC: xs_tcp_state_change client %p...\n", xprt);
- dprintk("RPC: state %x conn %d dead %d zapped %d\n",
+ dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n",
sk->sk_state, xprt_connected(xprt),
sock_flag(sk, SOCK_DEAD),
- sock_flag(sk, SOCK_ZAPPED));
+ sock_flag(sk, SOCK_ZAPPED),
+ sk->sk_shutdown);
switch (sk->sk_state) {
case TCP_ESTABLISHED:
@@ -1779,10 +1780,25 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra
{
unsigned int state = transport->inet->sk_state;
- if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
- return;
- if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
- return;
+ if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) {
+ /* we don't need to abort the connection if the socket
+ * hasn't undergone a shutdown
+ */
+ if (transport->inet->sk_shutdown == 0)
+ return;
+ dprintk("RPC: %s: TCP_CLOSEd and sk_shutdown set to %d\n",
+ __func__, transport->inet->sk_shutdown);
+ }
+ if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) {
+ /* we don't need to abort the connection if the socket
+ * hasn't undergone a shutdown
+ */
+ if (transport->inet->sk_shutdown == 0)
+ return;
+ dprintk("RPC: %s: ESTABLISHED/SYN_SENT "
+ "sk_shutdown set to %d\n",
+ __func__, transport->inet->sk_shutdown);
+ }
xs_abort_connection(xprt, transport);
}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index ba59983..b14ed4b 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2504,7 +2504,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
if (p->dir > XFRM_POLICY_OUT)
return NULL;
- xp = xfrm_policy_alloc(net, GFP_KERNEL);
+ xp = xfrm_policy_alloc(net, GFP_ATOMIC);
if (xp == NULL) {
*dir = -ENOBUFS;
return NULL;
diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c
index 642eef3..178061e 100644
--- a/samples/kfifo/bytestream-example.c
+++ b/samples/kfifo/bytestream-example.c
@@ -44,10 +44,17 @@ static struct kfifo test;
static DECLARE_KFIFO(test, unsigned char, FIFO_SIZE);
#endif
+static const unsigned char expected_result[FIFO_SIZE] = {
+ 3, 4, 5, 6, 7, 8, 9, 0,
+ 1, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 40, 41, 42,
+};
+
static int __init testfunc(void)
{
unsigned char buf[6];
- unsigned char i;
+ unsigned char i, j;
unsigned int ret;
printk(KERN_INFO "byte stream fifo test start\n");
@@ -73,16 +80,34 @@ static int __init testfunc(void)
ret = kfifo_in(&test, buf, ret);
printk(KERN_INFO "ret: %d\n", ret);
+ /* skip first element of the fifo */
+ printk(KERN_INFO "skip 1st element\n");
+ kfifo_skip(&test);
+
/* put values into the fifo until is full */
for (i = 20; kfifo_put(&test, &i); i++)
;
printk(KERN_INFO "queue len: %u\n", kfifo_len(&test));
- /* print out all values in the fifo */
- while (kfifo_get(&test, &i))
- printk("%d ", i);
- printk("\n");
+ /* show the first value without removing from the fifo */
+ if (kfifo_peek(&test, &i))
+ printk(KERN_INFO "%d\n", i);
+
+ /* check the correctness of all values in the fifo */
+ j = 0;
+ while (kfifo_get(&test, &i)) {
+ printk(KERN_INFO "item = %d\n", i);
+ if (i != expected_result[j++]) {
+ printk(KERN_WARNING "value mismatch: test failed\n");
+ return -EIO;
+ }
+ }
+ if (j != ARRAY_SIZE(expected_result)) {
+ printk(KERN_WARNING "size mismatch: test failed\n");
+ return -EIO;
+ }
+ printk(KERN_INFO "test passed\n");
return 0;
}
@@ -138,7 +163,12 @@ static int __init example_init(void)
#else
INIT_KFIFO(test);
#endif
- testfunc();
+ if (testfunc() < 0) {
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+ return -EIO;
+ }
if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) {
#ifdef DYNAMIC
diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c
index b9482c2..ee03a4f 100644
--- a/samples/kfifo/dma-example.c
+++ b/samples/kfifo/dma-example.c
@@ -29,8 +29,8 @@ static int __init example_init(void)
printk(KERN_INFO "DMA fifo test start\n");
if (kfifo_alloc(&fifo, FIFO_SIZE, GFP_KERNEL)) {
- printk(KERN_ERR "error kfifo_alloc\n");
- return 1;
+ printk(KERN_WARNING "error kfifo_alloc\n");
+ return -ENOMEM;
}
printk(KERN_INFO "queue size: %u\n", kfifo_size(&fifo));
@@ -41,72 +41,99 @@ static int __init example_init(void)
kfifo_put(&fifo, &i);
/* kick away first byte */
- ret = kfifo_get(&fifo, &i);
+ kfifo_skip(&fifo);
printk(KERN_INFO "queue len: %u\n", kfifo_len(&fifo));
+ /*
+ * Configure the kfifo buffer to receive data from DMA input.
+ *
+ * .--------------------------------------.
+ * | 0 | 1 | 2 | ... | 12 | 13 | ... | 31 |
+ * |---|------------------|---------------|
+ * \_/ \________________/ \_____________/
+ * \ \ \
+ * \ \_allocated data \
+ * \_*free space* \_*free space*
+ *
+ * We need two different SG entries: one for the free space area at the
+ * end of the kfifo buffer (19 bytes) and another for the first free
+ * byte at the beginning, after the kfifo_skip().
+ */
+ sg_init_table(sg, ARRAY_SIZE(sg));
ret = kfifo_dma_in_prepare(&fifo, sg, ARRAY_SIZE(sg), FIFO_SIZE);
printk(KERN_INFO "DMA sgl entries: %d\n", ret);
+ if (!ret) {
+ /* fifo is full and no sgl was created */
+ printk(KERN_WARNING "error kfifo_dma_in_prepare\n");
+ return -EIO;
+ }
- /* if 0 was returned, fifo is full and no sgl was created */
- if (ret) {
- printk(KERN_INFO "scatterlist for receive:\n");
- for (i = 0; i < ARRAY_SIZE(sg); i++) {
- printk(KERN_INFO
- "sg[%d] -> "
- "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
- i, sg[i].page_link, sg[i].offset, sg[i].length);
+ /* receive data */
+ printk(KERN_INFO "scatterlist for receive:\n");
+ for (i = 0; i < ARRAY_SIZE(sg); i++) {
+ printk(KERN_INFO
+ "sg[%d] -> "
+ "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
+ i, sg[i].page_link, sg[i].offset, sg[i].length);
- if (sg_is_last(&sg[i]))
- break;
- }
+ if (sg_is_last(&sg[i]))
+ break;
+ }
- /* but here your code to setup and exectute the dma operation */
- /* ... */
+ /* put here your code to setup and exectute the dma operation */
+ /* ... */
- /* example: zero bytes received */
- ret = 0;
+ /* example: zero bytes received */
+ ret = 0;
- /* finish the dma operation and update the received data */
- kfifo_dma_in_finish(&fifo, ret);
- }
+ /* finish the dma operation and update the received data */
+ kfifo_dma_in_finish(&fifo, ret);
+ /* Prepare to transmit data, example: 8 bytes */
ret = kfifo_dma_out_prepare(&fifo, sg, ARRAY_SIZE(sg), 8);
printk(KERN_INFO "DMA sgl entries: %d\n", ret);
+ if (!ret) {
+ /* no data was available and no sgl was created */
+ printk(KERN_WARNING "error kfifo_dma_out_prepare\n");
+ return -EIO;
+ }
- /* if 0 was returned, no data was available and no sgl was created */
- if (ret) {
- printk(KERN_INFO "scatterlist for transmit:\n");
- for (i = 0; i < ARRAY_SIZE(sg); i++) {
- printk(KERN_INFO
- "sg[%d] -> "
- "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
- i, sg[i].page_link, sg[i].offset, sg[i].length);
+ printk(KERN_INFO "scatterlist for transmit:\n");
+ for (i = 0; i < ARRAY_SIZE(sg); i++) {
+ printk(KERN_INFO
+ "sg[%d] -> "
+ "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n",
+ i, sg[i].page_link, sg[i].offset, sg[i].length);
- if (sg_is_last(&sg[i]))
- break;
- }
+ if (sg_is_last(&sg[i]))
+ break;
+ }
- /* but here your code to setup and exectute the dma operation */
- /* ... */
+ /* put here your code to setup and exectute the dma operation */
+ /* ... */
- /* example: 5 bytes transmitted */
- ret = 5;
+ /* example: 5 bytes transmitted */
+ ret = 5;
- /* finish the dma operation and update the transmitted data */
- kfifo_dma_out_finish(&fifo, ret);
- }
+ /* finish the dma operation and update the transmitted data */
+ kfifo_dma_out_finish(&fifo, ret);
+ ret = kfifo_len(&fifo);
printk(KERN_INFO "queue len: %u\n", kfifo_len(&fifo));
+ if (ret != 7) {
+ printk(KERN_WARNING "size mismatch: test failed");
+ return -EIO;
+ }
+ printk(KERN_INFO "test passed\n");
+
return 0;
}
static void __exit example_exit(void)
{
-#ifdef DYNAMIC
- kfifo_free(&test);
-#endif
+ kfifo_free(&fifo);
}
module_init(example_init);
diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c
index d6c5b7d..71b2aab 100644
--- a/samples/kfifo/inttype-example.c
+++ b/samples/kfifo/inttype-example.c
@@ -44,10 +44,17 @@ static DECLARE_KFIFO_PTR(test, int);
static DEFINE_KFIFO(test, int, FIFO_SIZE);
#endif
+static const int expected_result[FIFO_SIZE] = {
+ 3, 4, 5, 6, 7, 8, 9, 0,
+ 1, 20, 21, 22, 23, 24, 25, 26,
+ 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 40, 41, 42,
+};
+
static int __init testfunc(void)
{
int buf[6];
- int i;
+ int i, j;
unsigned int ret;
printk(KERN_INFO "int fifo test start\n");
@@ -66,8 +73,13 @@ static int __init testfunc(void)
ret = kfifo_in(&test, buf, ret);
printk(KERN_INFO "ret: %d\n", ret);
- for (i = 20; i != 30; i++)
- kfifo_put(&test, &i);
+ /* skip first element of the fifo */
+ printk(KERN_INFO "skip 1st element\n");
+ kfifo_skip(&test);
+
+ /* put values into the fifo until is full */
+ for (i = 20; kfifo_put(&test, &i); i++)
+ ;
printk(KERN_INFO "queue len: %u\n", kfifo_len(&test));
@@ -75,10 +87,20 @@ static int __init testfunc(void)
if (kfifo_peek(&test, &i))
printk(KERN_INFO "%d\n", i);
- /* print out all values in the fifo */
- while (kfifo_get(&test, &i))
- printk("%d ", i);
- printk("\n");
+ /* check the correctness of all values in the fifo */
+ j = 0;
+ while (kfifo_get(&test, &i)) {
+ printk(KERN_INFO "item = %d\n", i);
+ if (i != expected_result[j++]) {
+ printk(KERN_WARNING "value mismatch: test failed\n");
+ return -EIO;
+ }
+ }
+ if (j != ARRAY_SIZE(expected_result)) {
+ printk(KERN_WARNING "size mismatch: test failed\n");
+ return -EIO;
+ }
+ printk(KERN_INFO "test passed\n");
return 0;
}
@@ -132,7 +154,12 @@ static int __init example_init(void)
return ret;
}
#endif
- testfunc();
+ if (testfunc() < 0) {
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+ return -EIO;
+ }
if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) {
#ifdef DYNAMIC
diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c
index 32c6e0b..e68bd16 100644
--- a/samples/kfifo/record-example.c
+++ b/samples/kfifo/record-example.c
@@ -55,6 +55,19 @@ typedef STRUCT_KFIFO_REC_1(FIFO_SIZE) mytest;
static mytest test;
#endif
+static const char *expected_result[] = {
+ "a",
+ "bb",
+ "ccc",
+ "dddd",
+ "eeeee",
+ "ffffff",
+ "ggggggg",
+ "hhhhhhhh",
+ "iiiiiiiii",
+ "jjjjjjjjjj",
+};
+
static int __init testfunc(void)
{
char buf[100];
@@ -75,6 +88,10 @@ static int __init testfunc(void)
kfifo_in(&test, buf, i + 1);
}
+ /* skip first element of the fifo */
+ printk(KERN_INFO "skip 1st element\n");
+ kfifo_skip(&test);
+
printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test));
/* show the first record without removing from the fifo */
@@ -82,11 +99,22 @@ static int __init testfunc(void)
if (ret)
printk(KERN_INFO "%.*s\n", ret, buf);
- /* print out all records in the fifo */
+ /* check the correctness of all values in the fifo */
+ i = 0;
while (!kfifo_is_empty(&test)) {
ret = kfifo_out(&test, buf, sizeof(buf));
- printk(KERN_INFO "%.*s\n", ret, buf);
+ buf[ret] = '\0';
+ printk(KERN_INFO "item = %.*s\n", ret, buf);
+ if (strcmp(buf, expected_result[i++])) {
+ printk(KERN_WARNING "value mismatch: test failed\n");
+ return -EIO;
+ }
+ }
+ if (i != ARRAY_SIZE(expected_result)) {
+ printk(KERN_WARNING "size mismatch: test failed\n");
+ return -EIO;
}
+ printk(KERN_INFO "test passed\n");
return 0;
}
@@ -142,7 +170,12 @@ static int __init example_init(void)
#else
INIT_KFIFO(test);
#endif
- testfunc();
+ if (testfunc() < 0) {
+#ifdef DYNAMIC
+ kfifo_free(&test);
+#endif
+ return -EIO;
+ }
if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) {
#ifdef DYNAMIC
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 0171060..e67f054 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -159,6 +159,7 @@ my $section_regex; # Find the start of a section
my $function_regex; # Find the name of a function
# (return offset and func name)
my $mcount_regex; # Find the call site to mcount (return offset)
+my $mcount_adjust; # Address adjustment to mcount offset
my $alignment; # The .align value to use for $mcount_section
my $section_type; # Section header plus possible alignment command
my $can_use_local = 0; # If we can use local function references
@@ -213,6 +214,7 @@ $section_regex = "Disassembly of section\\s+(\\S+):";
$function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:";
$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$";
$section_type = '@progbits';
+$mcount_adjust = 0;
$type = ".long";
if ($arch eq "x86_64") {
@@ -351,6 +353,9 @@ if ($arch eq "x86_64") {
} elsif ($arch eq "microblaze") {
# Microblaze calls '_mcount' instead of plain 'mcount'.
$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
+} elsif ($arch eq "blackfin") {
+ $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s__mcount\$";
+ $mcount_adjust = -4;
} else {
die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
}
@@ -511,7 +516,7 @@ while () {
}
# is this a call site to mcount? If so, record it to print later
if ($text_found && /$mcount_regex/) {
- push(@offsets, hex $1);
+ push(@offsets, (hex $1) + $mcount_adjust);
}
}
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index d5666d3..f73e2c2 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -607,8 +607,8 @@ static int apparmor_setprocattr(struct task_struct *task, char *name,
return error;
}
-static int apparmor_task_setrlimit(unsigned int resource,
- struct rlimit *new_rlim)
+static int apparmor_task_setrlimit(struct task_struct *task,
+ unsigned int resource, struct rlimit *new_rlim)
{
struct aa_profile *profile = aa_current_profile();
int error = 0;
diff --git a/security/apparmor/path.c b/security/apparmor/path.c
index 96bab94..19358dc 100644
--- a/security/apparmor/path.c
+++ b/security/apparmor/path.c
@@ -62,19 +62,14 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
int deleted, connected;
int error = 0;
- /* Get the root we want to resolve too */
+ /* Get the root we want to resolve too, released below */
if (flags & PATH_CHROOT_REL) {
/* resolve paths relative to chroot */
- read_lock(¤t->fs->lock);
- root = current->fs->root;
- /* released below */
- path_get(&root);
- read_unlock(¤t->fs->lock);
+ get_fs_root(current->fs, &root);
} else {
/* resolve paths relative to namespace */
root.mnt = current->nsproxy->mnt_ns->root;
root.dentry = root.mnt->mnt_root;
- /* released below */
path_get(&root);
}
diff --git a/security/commoncap.c b/security/commoncap.c
index 4e01599..9d172e6 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -40,7 +40,7 @@
*
* Warn if that happens, once per boot.
*/
-static void warn_setuid_and_fcaps_mixed(char *fname)
+static void warn_setuid_and_fcaps_mixed(const char *fname)
{
static int warned;
if (!warned) {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 42043f9..4796ddd 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2170,8 +2170,9 @@ static inline void flush_unauthorized_files(const struct cred *cred,
tty = get_current_tty();
if (tty) {
- file_list_lock();
+ spin_lock(&tty_files_lock);
if (!list_empty(&tty->tty_files)) {
+ struct tty_file_private *file_priv;
struct inode *inode;
/* Revalidate access to controlling tty.
@@ -2179,14 +2180,16 @@ static inline void flush_unauthorized_files(const struct cred *cred,
than using file_has_perm, as this particular open
file may belong to another process and we are only
interested in the inode-based check here. */
- file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list);
+ file_priv = list_first_entry(&tty->tty_files,
+ struct tty_file_private, list);
+ file = file_priv->file;
inode = file->f_path.dentry->d_inode;
if (inode_has_perm(cred, inode,
FILE__READ | FILE__WRITE, NULL)) {
drop_tty = 1;
}
}
- file_list_unlock();
+ spin_unlock(&tty_files_lock);
tty_kref_put(tty);
}
/* Reset controlling tty. */
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index a3b2a64..134fc6c 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -978,6 +978,10 @@ static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push)
{
if (substream->runtime->trigger_master != substream)
return 0;
+ /* some drivers might use hw_ptr to recover from the pause -
+ update the hw_ptr now */
+ if (push)
+ snd_pcm_update_hw_ptr(substream);
/* The jiffies check in snd_pcm_update_hw_ptr*() is done by
* a delta betwen the current jiffies, this gives a large enough
* delta, effectively to skip the check once.
diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c
index 4203782..aff8387 100644
--- a/sound/pci/emu10k1/emu10k1.c
+++ b/sound/pci/emu10k1/emu10k1.c
@@ -52,6 +52,7 @@ static int max_synth_voices[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 64};
static int max_buffer_size[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 128};
static int enable_ir[SNDRV_CARDS];
static uint subsystem[SNDRV_CARDS]; /* Force card subsystem model */
+static uint delay_pcm_irq[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 2};
module_param_array(index, int, NULL, 0444);
MODULE_PARM_DESC(index, "Index value for the EMU10K1 soundcard.");
@@ -73,6 +74,8 @@ module_param_array(enable_ir, bool, NULL, 0444);
MODULE_PARM_DESC(enable_ir, "Enable IR.");
module_param_array(subsystem, uint, NULL, 0444);
MODULE_PARM_DESC(subsystem, "Force card subsystem model.");
+module_param_array(delay_pcm_irq, uint, NULL, 0444);
+MODULE_PARM_DESC(delay_pcm_irq, "Delay PCM interrupt by specified number of samples (default 0).");
/*
* Class 0401: 1102:0008 (rev 00) Subsystem: 1102:1001 -> Audigy2 Value Model:SB0400
*/
@@ -127,6 +130,7 @@ static int __devinit snd_card_emu10k1_probe(struct pci_dev *pci,
&emu)) < 0)
goto error;
card->private_data = emu;
+ emu->delay_pcm_irq = delay_pcm_irq[dev] & 0x1f;
if ((err = snd_emu10k1_pcm(emu, 0, NULL)) < 0)
goto error;
if ((err = snd_emu10k1_pcm_mic(emu, 1, NULL)) < 0)
diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c
index 55b83ef..622bace 100644
--- a/sound/pci/emu10k1/emupcm.c
+++ b/sound/pci/emu10k1/emupcm.c
@@ -332,7 +332,7 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
evoice->epcm->ccca_start_addr = start_addr + ccis;
if (extra) {
start_addr += ccis;
- end_addr += ccis;
+ end_addr += ccis + emu->delay_pcm_irq;
}
if (stereo && !extra) {
snd_emu10k1_ptr_write(emu, CPF, voice, CPF_STEREO_MASK);
@@ -360,7 +360,9 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu,
/* Assumption that PT is already 0 so no harm overwriting */
snd_emu10k1_ptr_write(emu, PTRX, voice, (send_amount[0] << 8) | send_amount[1]);
snd_emu10k1_ptr_write(emu, DSL, voice, end_addr | (send_amount[3] << 24));
- snd_emu10k1_ptr_write(emu, PSST, voice, start_addr | (send_amount[2] << 24));
+ snd_emu10k1_ptr_write(emu, PSST, voice,
+ (start_addr + (extra ? emu->delay_pcm_irq : 0)) |
+ (send_amount[2] << 24));
if (emu->card_capabilities->emu_model)
pitch_target = PITCH_48000; /* Disable interpolators on emu1010 card */
else
@@ -732,6 +734,23 @@ static void snd_emu10k1_playback_stop_voice(struct snd_emu10k1 *emu, struct snd_
snd_emu10k1_ptr_write(emu, IP, voice, 0);
}
+static inline void snd_emu10k1_playback_mangle_extra(struct snd_emu10k1 *emu,
+ struct snd_emu10k1_pcm *epcm,
+ struct snd_pcm_substream *substream,
+ struct snd_pcm_runtime *runtime)
+{
+ unsigned int ptr, period_pos;
+
+ /* try to sychronize the current position for the interrupt
+ source voice */
+ period_pos = runtime->status->hw_ptr - runtime->hw_ptr_interrupt;
+ period_pos %= runtime->period_size;
+ ptr = snd_emu10k1_ptr_read(emu, CCCA, epcm->extra->number);
+ ptr &= ~0x00ffffff;
+ ptr |= epcm->ccca_start_addr + period_pos;
+ snd_emu10k1_ptr_write(emu, CCCA, epcm->extra->number, ptr);
+}
+
static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream,
int cmd)
{
@@ -753,6 +772,8 @@ static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream,
/* follow thru */
case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
case SNDRV_PCM_TRIGGER_RESUME:
+ if (cmd == SNDRV_PCM_TRIGGER_PAUSE_RELEASE)
+ snd_emu10k1_playback_mangle_extra(emu, epcm, substream, runtime);
mix = &emu->pcm_mixer[substream->number];
snd_emu10k1_playback_prepare_voice(emu, epcm->voices[0], 1, 0, mix);
snd_emu10k1_playback_prepare_voice(emu, epcm->voices[1], 0, 0, mix);
@@ -869,8 +890,9 @@ static snd_pcm_uframes_t snd_emu10k1_playback_pointer(struct snd_pcm_substream *
#endif
/*
printk(KERN_DEBUG
- "ptr = 0x%x, buffer_size = 0x%x, period_size = 0x%x\n",
- ptr, runtime->buffer_size, runtime->period_size);
+ "ptr = 0x%lx, buffer_size = 0x%lx, period_size = 0x%lx\n",
+ (long)ptr, (long)runtime->buffer_size,
+ (long)runtime->period_size);
*/
return ptr;
}
diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c
index ffb1ddb..957a311 100644
--- a/sound/pci/emu10k1/memory.c
+++ b/sound/pci/emu10k1/memory.c
@@ -310,8 +310,10 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst
if (snd_BUG_ON(!hdr))
return NULL;
+ idx = runtime->period_size >= runtime->buffer_size ?
+ (emu->delay_pcm_irq * 2) : 0;
mutex_lock(&hdr->block_mutex);
- blk = search_empty(emu, runtime->dma_bytes);
+ blk = search_empty(emu, runtime->dma_bytes + idx);
if (blk == NULL) {
mutex_unlock(&hdr->block_mutex);
return NULL;
diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index 31b5d9e..c424952 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c
@@ -3049,6 +3049,7 @@ static struct snd_pci_quirk cxt5066_cfg_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x02f5, "Dell",
CXT5066_DELL_LAPTOP),
SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT5066_OLPC_XO_1_5),
+ SND_PCI_QUIRK(0x1028, 0x02d8, "Dell Vostro", CXT5066_DELL_VOSTO),
SND_PCI_QUIRK(0x1028, 0x0402, "Dell Vostro", CXT5066_DELL_VOSTO),
SND_PCI_QUIRK(0x1028, 0x0408, "Dell Inspiron One 19T", CXT5066_IDEAPAD),
SND_PCI_QUIRK(0x1179, 0xff50, "Toshiba Satellite P500-PSPGSC-01800T", CXT5066_OLPC_XO_1_5),
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 2cd1ae8..a4dd045 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -19030,6 +19030,7 @@ static int patch_alc888(struct hda_codec *codec)
/*
* ALC680 support
*/
+#define ALC680_DIGIN_NID ALC880_DIGIN_NID
#define ALC680_DIGOUT_NID ALC880_DIGOUT_NID
#define alc680_modes alc260_modes
@@ -19044,23 +19045,93 @@ static hda_nid_t alc680_adc_nids[3] = {
0x07, 0x08, 0x09
};
+/*
+ * Analog capture ADC cgange
+ */
+static int alc680_capture_pcm_prepare(struct hda_pcm_stream *hinfo,
+ struct hda_codec *codec,
+ unsigned int stream_tag,
+ unsigned int format,
+ struct snd_pcm_substream *substream)
+{
+ struct alc_spec *spec = codec->spec;
+ struct auto_pin_cfg *cfg = &spec->autocfg;
+ unsigned int pre_mic, pre_line;
+
+ pre_mic = snd_hda_jack_detect(codec, cfg->input_pins[AUTO_PIN_MIC]);
+ pre_line = snd_hda_jack_detect(codec, cfg->input_pins[AUTO_PIN_LINE]);
+
+ spec->cur_adc_stream_tag = stream_tag;
+ spec->cur_adc_format = format;
+
+ if (pre_mic || pre_line) {
+ if (pre_mic)
+ snd_hda_codec_setup_stream(codec, 0x08, stream_tag, 0,
+ format);
+ else
+ snd_hda_codec_setup_stream(codec, 0x09, stream_tag, 0,
+ format);
+ } else
+ snd_hda_codec_setup_stream(codec, 0x07, stream_tag, 0, format);
+ return 0;
+}
+
+static int alc680_capture_pcm_cleanup(struct hda_pcm_stream *hinfo,
+ struct hda_codec *codec,
+ struct snd_pcm_substream *substream)
+{
+ snd_hda_codec_cleanup_stream(codec, 0x07);
+ snd_hda_codec_cleanup_stream(codec, 0x08);
+ snd_hda_codec_cleanup_stream(codec, 0x09);
+ return 0;
+}
+
+static struct hda_pcm_stream alc680_pcm_analog_auto_capture = {
+ .substreams = 1, /* can be overridden */
+ .channels_min = 2,
+ .channels_max = 2,
+ /* NID is set in alc_build_pcms */
+ .ops = {
+ .prepare = alc680_capture_pcm_prepare,
+ .cleanup = alc680_capture_pcm_cleanup
+ },
+};
+
static struct snd_kcontrol_new alc680_base_mixer[] = {
/* output mixer control */
HDA_CODEC_VOLUME("Front Playback Volume", 0x2, 0x0, HDA_OUTPUT),
HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT),
HDA_CODEC_VOLUME("Headphone Playback Volume", 0x4, 0x0, HDA_OUTPUT),
HDA_CODEC_MUTE("Headphone Playback Switch", 0x16, 0x0, HDA_OUTPUT),
+ HDA_CODEC_VOLUME("Int Mic Boost", 0x12, 0, HDA_INPUT),
HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT),
+ HDA_CODEC_VOLUME("Line In Boost", 0x19, 0, HDA_INPUT),
{ }
};
-static struct snd_kcontrol_new alc680_capture_mixer[] = {
- HDA_CODEC_VOLUME("Capture Volume", 0x07, 0x0, HDA_INPUT),
- HDA_CODEC_MUTE("Capture Switch", 0x07, 0x0, HDA_INPUT),
- HDA_CODEC_VOLUME_IDX("Capture Volume", 1, 0x08, 0x0, HDA_INPUT),
- HDA_CODEC_MUTE_IDX("Capture Switch", 1, 0x08, 0x0, HDA_INPUT),
- HDA_CODEC_VOLUME_IDX("Capture Volume", 2, 0x09, 0x0, HDA_INPUT),
- HDA_CODEC_MUTE_IDX("Capture Switch", 2, 0x09, 0x0, HDA_INPUT),
+static struct hda_bind_ctls alc680_bind_cap_vol = {
+ .ops = &snd_hda_bind_vol,
+ .values = {
+ HDA_COMPOSE_AMP_VAL(0x07, 3, 0, HDA_INPUT),
+ HDA_COMPOSE_AMP_VAL(0x08, 3, 0, HDA_INPUT),
+ HDA_COMPOSE_AMP_VAL(0x09, 3, 0, HDA_INPUT),
+ 0
+ },
+};
+
+static struct hda_bind_ctls alc680_bind_cap_switch = {
+ .ops = &snd_hda_bind_sw,
+ .values = {
+ HDA_COMPOSE_AMP_VAL(0x07, 3, 0, HDA_INPUT),
+ HDA_COMPOSE_AMP_VAL(0x08, 3, 0, HDA_INPUT),
+ HDA_COMPOSE_AMP_VAL(0x09, 3, 0, HDA_INPUT),
+ 0
+ },
+};
+
+static struct snd_kcontrol_new alc680_master_capture_mixer[] = {
+ HDA_BIND_VOL("Capture Volume", &alc680_bind_cap_vol),
+ HDA_BIND_SW("Capture Switch", &alc680_bind_cap_switch),
{ } /* end */
};
@@ -19068,25 +19139,73 @@ static struct snd_kcontrol_new alc680_capture_mixer[] = {
* generic initialization of ADC, input mixers and output mixers
*/
static struct hda_verb alc680_init_verbs[] = {
- /* Unmute DAC0-1 and set vol = 0 */
- {0x02, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
- {0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
- {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO},
+ {0x02, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+ {0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
+ {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE},
- {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
- {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40},
- {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0},
- {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24},
- {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20},
+ {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
+ {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+ {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT},
+ {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP},
+ {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80},
+ {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN},
{0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
{0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
{0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
{0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
{0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE},
+
+ {0x16, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN},
+ {0x18, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_MIC_EVENT | AC_USRSP_EN},
+
{ }
};
+/* toggle speaker-output according to the hp-jack state */
+static void alc680_base_setup(struct hda_codec *codec)
+{
+ struct alc_spec *spec = codec->spec;
+
+ spec->autocfg.hp_pins[0] = 0x16;
+ spec->autocfg.speaker_pins[0] = 0x14;
+ spec->autocfg.speaker_pins[1] = 0x15;
+ spec->autocfg.input_pins[AUTO_PIN_MIC] = 0x18;
+ spec->autocfg.input_pins[AUTO_PIN_LINE] = 0x19;
+}
+
+static void alc680_rec_autoswitch(struct hda_codec *codec)
+{
+ struct alc_spec *spec = codec->spec;
+ struct auto_pin_cfg *cfg = &spec->autocfg;
+ unsigned int present;
+ hda_nid_t new_adc;
+
+ present = snd_hda_jack_detect(codec, cfg->input_pins[AUTO_PIN_MIC]);
+
+ new_adc = present ? 0x8 : 0x7;
+ __snd_hda_codec_cleanup_stream(codec, !present ? 0x8 : 0x7, 1);
+ snd_hda_codec_setup_stream(codec, new_adc,
+ spec->cur_adc_stream_tag, 0,
+ spec->cur_adc_format);
+
+}
+
+static void alc680_unsol_event(struct hda_codec *codec,
+ unsigned int res)
+{
+ if ((res >> 26) == ALC880_HP_EVENT)
+ alc_automute_amp(codec);
+ if ((res >> 26) == ALC880_MIC_EVENT)
+ alc680_rec_autoswitch(codec);
+}
+
+static void alc680_inithook(struct hda_codec *codec)
+{
+ alc_automute_amp(codec);
+ alc680_rec_autoswitch(codec);
+}
+
/* create input playback/capture controls for the given pin */
static int alc680_new_analog_output(struct alc_spec *spec, hda_nid_t nid,
const char *ctlname, int idx)
@@ -19197,13 +19316,7 @@ static void alc680_auto_init_hp_out(struct hda_codec *codec)
#define alc680_pcm_analog_capture alc880_pcm_analog_capture
#define alc680_pcm_analog_alt_capture alc880_pcm_analog_alt_capture
#define alc680_pcm_digital_playback alc880_pcm_digital_playback
-
-static struct hda_input_mux alc680_capture_source = {
- .num_items = 1,
- .items = {
- { "Mic", 0x0 },
- },
-};
+#define alc680_pcm_digital_capture alc880_pcm_digital_capture
/*
* BIOS auto configuration
@@ -19218,6 +19331,7 @@ static int alc680_parse_auto_config(struct hda_codec *codec)
alc680_ignore);
if (err < 0)
return err;
+
if (!spec->autocfg.line_outs) {
if (spec->autocfg.dig_outs || spec->autocfg.dig_in_pin) {
spec->multiout.max_channels = 2;
@@ -19239,8 +19353,6 @@ static int alc680_parse_auto_config(struct hda_codec *codec)
add_mixer(spec, spec->kctls.list);
add_verb(spec, alc680_init_verbs);
- spec->num_mux_defs = 1;
- spec->input_mux = &alc680_capture_source;
err = alc_auto_add_mic_boost(codec);
if (err < 0)
@@ -19279,17 +19391,17 @@ static struct snd_pci_quirk alc680_cfg_tbl[] = {
static struct alc_config_preset alc680_presets[] = {
[ALC680_BASE] = {
.mixers = { alc680_base_mixer },
- .cap_mixer = alc680_capture_mixer,
+ .cap_mixer = alc680_master_capture_mixer,
.init_verbs = { alc680_init_verbs },
.num_dacs = ARRAY_SIZE(alc680_dac_nids),
.dac_nids = alc680_dac_nids,
- .num_adc_nids = ARRAY_SIZE(alc680_adc_nids),
- .adc_nids = alc680_adc_nids,
- .hp_nid = 0x04,
.dig_out_nid = ALC680_DIGOUT_NID,
.num_channel_mode = ARRAY_SIZE(alc680_modes),
.channel_mode = alc680_modes,
- .input_mux = &alc680_capture_source,
+ .unsol_event = alc680_unsol_event,
+ .setup = alc680_base_setup,
+ .init_hook = alc680_inithook,
+
},
};
@@ -19333,9 +19445,9 @@ static int patch_alc680(struct hda_codec *codec)
setup_preset(codec, &alc680_presets[board_config]);
spec->stream_analog_playback = &alc680_pcm_analog_playback;
- spec->stream_analog_capture = &alc680_pcm_analog_capture;
- spec->stream_analog_alt_capture = &alc680_pcm_analog_alt_capture;
+ spec->stream_analog_capture = &alc680_pcm_analog_auto_capture;
spec->stream_digital_playback = &alc680_pcm_digital_playback;
+ spec->stream_digital_capture = &alc680_pcm_digital_capture;
if (!spec->adc_nids) {
spec->adc_nids = alc680_adc_nids;
diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c
index f64fb7d..ad5202e 100644
--- a/sound/pci/riptide/riptide.c
+++ b/sound/pci/riptide/riptide.c
@@ -1224,15 +1224,14 @@ static int try_to_load_firmware(struct cmdif *cif, struct snd_riptide *chip)
firmware.firmware.ASIC, firmware.firmware.CODEC,
firmware.firmware.AUXDSP, firmware.firmware.PROG);
+ if (!chip)
+ return 1;
+
for (i = 0; i < FIRMWARE_VERSIONS; i++) {
if (!memcmp(&firmware_versions[i], &firmware, sizeof(firmware)))
- break;
- }
- if (i >= FIRMWARE_VERSIONS)
- return 0; /* no match */
+ return 1; /* OK */
- if (!chip)
- return 1; /* OK */
+ }
snd_printdd("Writing Firmware\n");
if (!chip->fw_entry) {
diff --git a/sound/soc/codecs/wm8776.c b/sound/soc/codecs/wm8776.c
index 4e212ed..f8154e6 100644
--- a/sound/soc/codecs/wm8776.c
+++ b/sound/soc/codecs/wm8776.c
@@ -178,13 +178,6 @@ static int wm8776_set_fmt(struct snd_soc_dai *dai, unsigned int fmt)
case SND_SOC_DAIFMT_LEFT_J:
iface |= 0x0001;
break;
- /* FIXME: CHECK A/B */
- case SND_SOC_DAIFMT_DSP_A:
- iface |= 0x0003;
- break;
- case SND_SOC_DAIFMT_DSP_B:
- iface |= 0x0007;
- break;
default:
return -EINVAL;
}
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 41abb90..4f1fa77 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -5,6 +5,12 @@ endif
# The default target of this Makefile is...
all::
+ifneq ($(OUTPUT),)
+# check that the output directory actually exists
+OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd)
+$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
+endif
+
# Define V=1 to have a more verbose compile.
# Define V=2 to have an even more verbose compile.
#
@@ -157,10 +163,6 @@ all::
#
# Define NO_DWARF if you do not want debug-info analysis feature at all.
-$(shell sh -c 'mkdir -p $(OUTPUT)scripts/{perl,python}/Perf-Trace-Util/' 2> /dev/null)
-$(shell sh -c 'mkdir -p $(OUTPUT)util/{ui/browsers,scripting-engines}/' 2> /dev/null)
-$(shell sh -c 'mkdir $(OUTPUT)bench' 2> /dev/null)
-
$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
@$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
-include $(OUTPUT)PERF-VERSION-FILE
@@ -186,8 +188,6 @@ ifeq ($(ARCH),x86_64)
ARCH := x86
endif
-$(shell sh -c 'mkdir -p $(OUTPUT)arch/$(ARCH)/util/' 2> /dev/null)
-
# CFLAGS and LDFLAGS are for the users to override from the command line.
#
@@ -268,6 +268,7 @@ export prefix bindir sharedir sysconfdir
CC = $(CROSS_COMPILE)gcc
AR = $(CROSS_COMPILE)ar
RM = rm -f
+MKDIR = mkdir
TAR = tar
FIND = find
INSTALL = install
@@ -838,6 +839,7 @@ ifndef V
QUIET_CC = @echo ' ' CC $@;
QUIET_AR = @echo ' ' AR $@;
QUIET_LINK = @echo ' ' LINK $@;
+ QUIET_MKDIR = @echo ' ' MKDIR $@;
QUIET_BUILT_IN = @echo ' ' BUILTIN $@;
QUIET_GEN = @echo ' ' GEN $@;
QUIET_SUBDIR0 = +@subdir=
@@ -935,15 +937,15 @@ $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
$(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh
- $(QUIET_GEN)$(RM) $@ $@+ && \
+ $(QUIET_GEN)$(RM) $(OUTPUT)$@ $(OUTPUT)$@+ && \
sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \
-e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \
-e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \
-e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \
-e 's/@@NO_CURL@@/$(NO_CURL)/g' \
- $@.sh >$@+ && \
- chmod +x $@+ && \
- mv $@+ $(OUTPUT)$@
+ $@.sh > $(OUTPUT)$@+ && \
+ chmod +x $(OUTPUT)$@+ && \
+ mv $(OUTPUT)$@+ $(OUTPUT)$@
configure: configure.ac
$(QUIET_GEN)$(RM) $@ $<+ && \
@@ -1012,6 +1014,14 @@ $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
builtin-revert.o wt-status.o: wt-status.h
+# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
+# we depend the various files onto their directories.
+DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
+$(DIRECTORY_DEPS): $(sort $(dir $(DIRECTORY_DEPS)))
+# In the second step, we make a rule to actually create these directories
+$(sort $(dir $(DIRECTORY_DEPS))):
+ $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null
+
$(LIB_FILE): $(LIB_OBJS)
$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak
index ddb68e6..7a7b608 100644
--- a/tools/perf/feature-tests.mak
+++ b/tools/perf/feature-tests.mak
@@ -113,7 +113,7 @@ endef
# try-cc
# Usage: option = $(call try-cc, source-to-build, cc-options)
try-cc = $(shell sh -c \
- 'TMP="$(TMPOUT).$$$$"; \
+ 'TMP="$(OUTPUT)$(TMPOUT).$$$$"; \
echo "$(1)" | \
$(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \
rm -f "$$TMP"')
diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c
index 55ff792..a90273e 100644
--- a/tools/perf/util/ui/browsers/annotate.c
+++ b/tools/perf/util/ui/browsers/annotate.c
@@ -146,6 +146,7 @@ static int annotate_browser__run(struct annotate_browser *self,
return -1;
newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT);
+ newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT);
nd = self->curr_hot;
if (nd) {
@@ -178,7 +179,7 @@ static int annotate_browser__run(struct annotate_browser *self,
}
out:
ui_browser__hide(&self->b);
- return 0;
+ return es->u.key;
}
int hist_entry__tui_annotate(struct hist_entry *self)