diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 084f6ad..ed64d22 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1645,7 +1645,9 @@ the amount of locking which needs to be done. all the readers who were traversing the list when we deleted the element are finished. We use call_rcu() to register a callback which will actually destroy the object once - the readers are finished. + all pre-existing readers are finished. Alternatively, + synchronize_rcu() may be used to block until + all pre-existing are finished. But how does Read Copy Update know when the readers are @@ -1714,7 +1716,7 @@ the amount of locking which needs to be done. - object_put(obj); + list_del_rcu(&obj->list); cache_num--; -+ call_rcu(&obj->rcu, cache_delete_rcu, obj); ++ call_rcu(&obj->rcu, cache_delete_rcu); } /* Must be holding cache_lock */ @@ -1725,14 +1727,6 @@ the amount of locking which needs to be done. if (++cache_num > MAX_CACHE_SIZE) { struct object *i, *outcast = NULL; list_for_each_entry(i, &cache, list) { -@@ -85,6 +94,7 @@ - obj->popularity = 0; - atomic_set(&obj->refcnt, 1); /* The cache holds a reference */ - spin_lock_init(&obj->lock); -+ INIT_RCU_HEAD(&obj->rcu); - - spin_lock_irqsave(&cache_lock, flags); - __cache_add(obj); @@ -104,12 +114,11 @@ struct object *cache_find(int id) { diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 790d1a8..0c134f8 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -218,13 +218,22 @@ over a rather long period of time, but improvements are always welcome! include: a. Keeping a count of the number of data-structure elements - used by the RCU-protected data structure, including those - waiting for a grace period to elapse. Enforce a limit - on this number, stalling updates as needed to allow - previously deferred frees to complete. - - Alternatively, limit only the number awaiting deferred - free rather than the total number of elements. + used by the RCU-protected data structure, including + those waiting for a grace period to elapse. Enforce a + limit on this number, stalling updates as needed to allow + previously deferred frees to complete. Alternatively, + limit only the number awaiting deferred free rather than + the total number of elements. + + One way to stall the updates is to acquire the update-side + mutex. (Don't try this with a spinlock -- other CPUs + spinning on the lock could prevent the grace period + from ever ending.) Another way to stall the updates + is for the updates to use a wrapper function around + the memory allocator, so that this wrapper function + simulates OOM when there is too much memory awaiting an + RCU grace period. There are of course many other + variations on this theme. b. Limiting update rate. For example, if updates occur only once per hour, then no explicit rate limiting is required, @@ -365,3 +374,26 @@ over a rather long period of time, but improvements are always welcome! and the compiler to freely reorder code into and out of RCU read-side critical sections. It is the responsibility of the RCU update-side primitives to deal with this. + +17. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and + the __rcu sparse checks to validate your RCU code. These + can help find problems as follows: + + CONFIG_PROVE_RCU: check that accesses to RCU-protected data + structures are carried out under the proper RCU + read-side critical section, while holding the right + combination of locks, or whatever other conditions + are appropriate. + + CONFIG_DEBUG_OBJECTS_RCU_HEAD: check that you don't pass the + same object to call_rcu() (or friends) before an RCU + grace period has elapsed since the last time that you + passed that same object to call_rcu() (or friends). + + __rcu sparse checks: tag the pointer to the RCU-protected data + structure with __rcu, and sparse will warn you if you + access that pointer without the services of one of the + variants of rcu_dereference(). + + These debugging aids can help you find problems that are + otherwise extremely difficult to spot. diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index f6f8025..1565eef 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -1024,6 +1024,10 @@ ThinkPad-specific interface. The driver will disable its native backlight brightness control interface if it detects that the standard ACPI interface is available in the ThinkPad. +If you want to use the thinkpad-acpi backlight brightness control +instead of the generic ACPI video backlight brightness control for some +reason, you should use the acpi_backlight=vendor kernel parameter. + The brightness_enable module parameter can be used to control whether the LCD brightness control feature will be enabled when available. brightness_enable=0 forces it to be disabled. brightness_enable=1 diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index 568fa08..302db5d 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -49,40 +49,13 @@ Table of Contents f) MDIO on GPIOs g) SPI busses - VII - Marvell Discovery mv64[345]6x System Controller chips - 1) The /system-controller node - 2) Child nodes of /system-controller - a) Marvell Discovery MDIO bus - b) Marvell Discovery ethernet controller - c) Marvell Discovery PHY nodes - d) Marvell Discovery SDMA nodes - e) Marvell Discovery BRG nodes - f) Marvell Discovery CUNIT nodes - g) Marvell Discovery MPSCROUTING nodes - h) Marvell Discovery MPSCINTR nodes - i) Marvell Discovery MPSC nodes - j) Marvell Discovery Watch Dog Timer nodes - k) Marvell Discovery I2C nodes - l) Marvell Discovery PIC (Programmable Interrupt Controller) nodes - m) Marvell Discovery MPP (Multipurpose Pins) multiplexing nodes - n) Marvell Discovery GPP (General Purpose Pins) nodes - o) Marvell Discovery PCI host bridge node - p) Marvell Discovery CPU Error nodes - q) Marvell Discovery SRAM Controller nodes - r) Marvell Discovery PCI Error Handler nodes - s) Marvell Discovery Memory Controller nodes - - VIII - Specifying interrupt information for devices + VII - Specifying interrupt information for devices 1) interrupts property 2) interrupt-parent property 3) OpenPIC Interrupt Controllers 4) ISA Interrupt Controllers - IX - Specifying GPIO information for devices - 1) gpios property - 2) gpio-controller nodes - - X - Specifying device power management information (sleep property) + VIII - Specifying device power management information (sleep property) Appendix A - Sample SOC node for MPC8540 diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 88e608a..842dba3 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -387,8 +387,9 @@ EXPORT_SYMBOL(dump_elf_task_fp); * sys_execve() executes a new program. */ asmlinkage int -do_sys_execve(const char __user *ufilename, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +do_sys_execve(const char __user *ufilename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { int error; char *filename; diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 99b8200..59c1ce8 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -21,6 +21,9 @@ GZFLAGS :=-9 # Explicitly specifiy 32-bit ARM ISA since toolchain default can be -mthumb: KBUILD_CFLAGS +=$(call cc-option,-marm,) +# Never generate .eh_frame +KBUILD_CFLAGS += $(call cc-option,-fno-dwarf2-cfi-asm) + # Do not use arch/arm/defconfig - it's always outdated. # Select a platform tht is kept up-to-date KBUILD_DEFCONFIG := versatile_defconfig diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index c974be8..7ce15eb 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -158,15 +158,24 @@ struct pt_regs { */ static inline int valid_user_regs(struct pt_regs *regs) { - if (user_mode(regs) && (regs->ARM_cpsr & PSR_I_BIT) == 0) { - regs->ARM_cpsr &= ~(PSR_F_BIT | PSR_A_BIT); - return 1; + unsigned long mode = regs->ARM_cpsr & MODE_MASK; + + /* + * Always clear the F (FIQ) and A (delayed abort) bits + */ + regs->ARM_cpsr &= ~(PSR_F_BIT | PSR_A_BIT); + + if ((regs->ARM_cpsr & PSR_I_BIT) == 0) { + if (mode == USR_MODE) + return 1; + if (elf_hwcap & HWCAP_26BIT && mode == USR26_MODE) + return 1; } /* * Force CPSR to something logical... */ - regs->ARM_cpsr &= PSR_f | PSR_s | (PSR_x & ~PSR_A_BIT) | PSR_T_BIT | MODE32_BIT; + regs->ARM_cpsr &= PSR_f | PSR_s | PSR_x | PSR_T_BIT | MODE32_BIT; if (!(elf_hwcap & HWCAP_26BIT)) regs->ARM_cpsr |= USR_MODE; diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index dd2bf53..d02cfb6 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -392,6 +392,7 @@ #define __NR_rt_tgsigqueueinfo (__NR_SYSCALL_BASE+363) #define __NR_perf_event_open (__NR_SYSCALL_BASE+364) #define __NR_recvmmsg (__NR_SYSCALL_BASE+365) +#define __NR_accept4 (__NR_SYSCALL_BASE+366) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 37ae301..afeb71f 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -375,6 +375,7 @@ CALL(sys_rt_tgsigqueueinfo) CALL(sys_perf_event_open) /* 365 */ CALL(sys_recvmmsg) + CALL(sys_accept4) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 778c2f7..d6e8b4d 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -79,7 +79,7 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) return; /* Initialize to zero */ - for (regno = 0; regno < GDB_MAX_REGS; regno++) + for (regno = 0; regno < DBG_MAX_REG_NUM; regno++) gdb_regs[regno] = 0; /* Otherwise, we have only some registers from switch_to() */ diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 5b7c541..62e7c61 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -62,8 +62,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs) /* sys_execve() executes a new program. * This is called indirectly via a small wrapper */ -asmlinkage int sys_execve(const char __user *filenamei, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +asmlinkage int sys_execve(const char __user *filenamei, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { int error; char * filename; @@ -78,14 +79,17 @@ out: return error; } -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { struct pt_regs regs; int ret; memset(®s, 0, sizeof(struct pt_regs)); - ret = do_execve(filename, (char __user * __user *)argv, - (char __user * __user *)envp, ®s); + ret = do_execve(filename, + (const char __user *const __user *)argv, + (const char __user *const __user *)envp, ®s); if (ret < 0) goto out; diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index e5daddf..9c46aaa 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -384,8 +384,9 @@ asmlinkage int sys_vfork(struct pt_regs *regs) } asmlinkage int sys_execve(const char __user *ufilename, - char __user *__user *uargv, - char __user *__user *uenvp, struct pt_regs *regs) + const char __user *const __user *uargv, + const char __user *const __user *uenvp, + struct pt_regs *regs) { int error; char *filename; diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c index 459349b..62635a0 100644 --- a/arch/avr32/kernel/sys_avr32.c +++ b/arch/avr32/kernel/sys_avr32.c @@ -7,7 +7,9 @@ */ #include -int kernel_execve(const char *file, char **argv, char **envp) +int kernel_execve(const char *file, + const char *const *argv, + const char *const *envp) { register long scno asm("r8") = __NR_execve; register long sc1 asm("r12") = (long)file; diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index a566f61..01f98cb 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -209,7 +209,9 @@ copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char *filename; diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index 93f0f64..9a57db6 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -204,7 +204,9 @@ asmlinkage int sys_vfork(long r10, long r11, long r12, long r13, long mof, long /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *fname, char **argv, char **envp, +asmlinkage int sys_execve(const char *fname, + const char *const *argv, + const char *const *envp, long r13, long mof, long srp, struct pt_regs *regs) { diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index 2661a95..562f847 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -218,8 +218,10 @@ sys_vfork(long r10, long r11, long r12, long r13, long mof, long srp, /* sys_execve() executes a new program. */ asmlinkage int -sys_execve(const char *fname, char **argv, char **envp, long r13, long mof, long srp, - struct pt_regs *regs) +sys_execve(const char *fname, + const char *const *argv, + const char *const *envp, long r13, long mof, long srp, + struct pt_regs *regs) { int error; char *filename; diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 428931c..2b63b01 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -250,8 +250,9 @@ int copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char * filename; diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 8b7b78d..9747813 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -212,7 +212,10 @@ int copy_thread(unsigned long clone_flags, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *name, char **argv, char **envp,int dummy,...) +asmlinkage int sys_execve(const char *name, + const char *const *argv, + const char *const *envp, + int dummy, ...) { int error; char * filename; diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c index f9b3f44..dc1ac02 100644 --- a/arch/h8300/kernel/sys_h8300.c +++ b/arch/h8300/kernel/sys_h8300.c @@ -51,7 +51,9 @@ asmlinkage void syscall_print(void *dummy,...) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long res __asm__("er0"); register char *const *_c __asm__("er3") = envp; diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 87f1bd1..954d398 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -356,8 +356,6 @@ asmlinkage unsigned long sys_mmap2( int fd, long pgoff); struct pt_regs; struct sigaction; -long sys_execve(const char __user *filename, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs); asmlinkage long sys_ia64_pipe(void); asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act, diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index a879c03..16f1c7b 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -633,7 +633,9 @@ dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) } long -sys_execve (const char __user *filename, char __user * __user *argv, char __user * __user *envp, +sys_execve (const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { char *fname; diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 8665a4d..422bea9 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -289,8 +289,8 @@ asmlinkage int sys_vfork(unsigned long r0, unsigned long r1, unsigned long r2, * sys_execve() executes a new program. */ asmlinkage int sys_execve(const char __user *ufilename, - char __user * __user *uargv, - char __user * __user *uenvp, + const char __user *const __user *uargv, + const char __user *const __user *uenvp, unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, struct pt_regs regs) { diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c index 0a00f46..d841fb6 100644 --- a/arch/m32r/kernel/sys_m32r.c +++ b/arch/m32r/kernel/sys_m32r.c @@ -93,7 +93,9 @@ asmlinkage int sys_cachectl(char *addr, int nbytes, int op) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __scno __asm__ ("r7") = __NR_execve; register long __arg3 __asm__ ("r2") = (long)(envp); diff --git a/arch/m68k/include/asm/ide.h b/arch/m68k/include/asm/ide.h index 3958726..492fee8 100644 --- a/arch/m68k/include/asm/ide.h +++ b/arch/m68k/include/asm/ide.h @@ -1,6 +1,4 @@ /* - * linux/include/asm-m68k/ide.h - * * Copyright (C) 1994-1996 Linus Torvalds & authors */ @@ -34,6 +32,8 @@ #include #include +#ifdef CONFIG_MMU + /* * Get rid of defs from io.h - ide has its private and conflicting versions * Since so far no single m68k platform uses ISA/PCI I/O space for IDE, we @@ -53,5 +53,14 @@ #define __ide_mm_outsw(port, addr, n) raw_outsw((u16 *)port, addr, n) #define __ide_mm_outsl(port, addr, n) raw_outsl((u32 *)port, addr, n) +#else + +#define __ide_mm_insw(port, addr, n) io_insw((unsigned int)port, addr, n) +#define __ide_mm_insl(port, addr, n) io_insl((unsigned int)port, addr, n) +#define __ide_mm_outsw(port, addr, n) io_outsw((unsigned int)port, addr, n) +#define __ide_mm_outsl(port, addr, n) io_outsl((unsigned int)port, addr, n) + +#endif /* CONFIG_MMU */ + #endif /* __KERNEL__ */ #endif /* _M68K_IDE_H */ diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 221d0b7..18732ab 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -315,7 +315,9 @@ EXPORT_SYMBOL(dump_fpu); /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char __user *name, char __user * __user *argv, char __user * __user *envp) +asmlinkage int sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp) { int error; char * filename; diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c index 7789669..2f431ec 100644 --- a/arch/m68k/kernel/sys_m68k.c +++ b/arch/m68k/kernel/sys_m68k.c @@ -459,7 +459,9 @@ asmlinkage int sys_getpagesize(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __res asm ("%d0") = __NR_execve; register long __a asm ("%d1") = (long)(filename); diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 6350f68..6d33905 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -316,14 +316,14 @@ void dump(struct pt_regs *fp) fp->d0, fp->d1, fp->d2, fp->d3); printk(KERN_EMERG "d4: %08lx d5: %08lx a0: %08lx a1: %08lx\n", fp->d4, fp->d5, fp->a0, fp->a1); - printk(KERN_EMERG "\nUSP: %08x TRAPFRAME: %08x\n", - (unsigned int) rdusp(), (unsigned int) fp); + printk(KERN_EMERG "\nUSP: %08x TRAPFRAME: %p\n", + (unsigned int) rdusp(), fp); printk(KERN_EMERG "\nCODE:"); tp = ((unsigned char *) fp->pc) - 0x20; for (sp = (unsigned long *) tp, i = 0; (i < 0x40); i += 4) { if ((i % 0x10) == 0) - printk(KERN_EMERG "%08x: ", (int) (tp + i)); + printk(KERN_EMERG "%p: ", tp + i); printk("%08x ", (int) *sp++); } printk(KERN_EMERG "\n"); @@ -332,7 +332,7 @@ void dump(struct pt_regs *fp) tp = ((unsigned char *) fp) - 0x40; for (sp = (unsigned long *) tp, i = 0; (i < 0xc0); i += 4) { if ((i % 0x10) == 0) - printk(KERN_EMERG "%08x: ", (int) (tp + i)); + printk(KERN_EMERG "%p: ", tp + i); printk("%08x ", (int) *sp++); } printk(KERN_EMERG "\n"); @@ -341,7 +341,7 @@ void dump(struct pt_regs *fp) tp = (unsigned char *) (rdusp() - 0x10); for (sp = (unsigned long *) tp, i = 0; (i < 0x80); i += 4) { if ((i % 0x10) == 0) - printk(KERN_EMERG "%08x: ", (int) (tp + i)); + printk(KERN_EMERG "%p: ", tp + i); printk("%08x ", (int) *sp++); } printk(KERN_EMERG "\n"); @@ -350,7 +350,9 @@ void dump(struct pt_regs *fp) /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(const char *name, char **argv, char **envp) +asmlinkage int sys_execve(const char *name, + const char *const *argv, + const char *const *envp) { int error; char * filename; diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c index d65e9c4..68488ae 100644 --- a/arch/m68knommu/kernel/sys_m68k.c +++ b/arch/m68knommu/kernel/sys_m68k.c @@ -44,7 +44,9 @@ asmlinkage int sys_getpagesize(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __res asm ("%d0") = __NR_execve; register long __a asm ("%d1") = (long)(filename); diff --git a/arch/microblaze/kernel/prom_parse.c b/arch/microblaze/kernel/prom_parse.c index d33ba17..99d9b61 100644 --- a/arch/microblaze/kernel/prom_parse.c +++ b/arch/microblaze/kernel/prom_parse.c @@ -73,7 +73,7 @@ int of_irq_map_pci(struct pci_dev *pdev, struct of_irq *out_irq) /* We can only get here if we hit a P2P bridge with no node, * let's do standard swizzling and try again */ - lspec = of_irq_pci_swizzle(PCI_SLOT(pdev->devfn), lspec); + lspec = pci_swizzle_interrupt_pin(pdev, lspec); pdev = ppdev; } diff --git a/arch/microblaze/kernel/sys_microblaze.c b/arch/microblaze/kernel/sys_microblaze.c index 6abab6e..2250fe9 100644 --- a/arch/microblaze/kernel/sys_microblaze.c +++ b/arch/microblaze/kernel/sys_microblaze.c @@ -47,8 +47,10 @@ asmlinkage long microblaze_clone(int flags, unsigned long stack, struct pt_regs return do_fork(flags, stack, regs, 0, NULL, NULL); } -asmlinkage long microblaze_execve(const char __user *filenamei, char __user *__user *argv, - char __user *__user *envp, struct pt_regs *regs) +asmlinkage long microblaze_execve(const char __user *filenamei, + const char __user *const __user *argv, + const char __user *const __user *envp, + struct pt_regs *regs) { int error; char *filename; @@ -77,7 +79,9 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register const char *__a __asm__("r5") = filename; register const void *__b __asm__("r6") = argv; diff --git a/arch/microblaze/pci/pci-common.c b/arch/microblaze/pci/pci-common.c index 23be25f..55ef532 100644 --- a/arch/microblaze/pci/pci-common.c +++ b/arch/microblaze/pci/pci-common.c @@ -27,10 +27,11 @@ #include #include #include +#include +#include #include #include -#include #include #include @@ -1077,7 +1078,7 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus) struct dev_archdata *sd = &dev->dev.archdata; /* Setup OF node pointer in archdata */ - sd->of_node = pci_device_to_OF_node(dev); + dev->dev.of_node = pci_device_to_OF_node(dev); /* Fixup NUMA node as it may not be setup yet by the generic * code and is needed by the DMA init diff --git a/arch/microblaze/pci/xilinx_pci.c b/arch/microblaze/pci/xilinx_pci.c index 7869a41..0687a42 100644 --- a/arch/microblaze/pci/xilinx_pci.c +++ b/arch/microblaze/pci/xilinx_pci.c @@ -16,6 +16,7 @@ #include #include +#include #include #include diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index bddce0b..1dc6edf 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -258,8 +258,10 @@ asmlinkage int sys_execve(nabi_no_regargs struct pt_regs regs) error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user *__user *) (long)regs.regs[5], - (char __user *__user *) (long)regs.regs[6], ®s); + error = do_execve(filename, + (const char __user *const __user *) (long)regs.regs[5], + (const char __user *const __user *) (long)regs.regs[6], + ®s); putname(filename); out: @@ -436,7 +438,9 @@ asmlinkage void bad_stack(void) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __a0 asm("$4") = (unsigned long) filename; register unsigned long __a1 asm("$5") = (unsigned long) argv; diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 762eb32..f48373e 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -269,8 +269,8 @@ asmlinkage long sys_vfork(void) } asmlinkage long sys_execve(const char __user *name, - char __user * __user *argv, - char __user * __user *envp) + const char __user *const __user *argv, + const char __user *const __user *envp) { char *filename; int error; diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c index 1444875..0dc8543 100644 --- a/arch/parisc/hpux/fs.c +++ b/arch/parisc/hpux/fs.c @@ -41,8 +41,10 @@ int hpux_execve(struct pt_regs *regs) if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user * __user *) regs->gr[25], - (char __user * __user *) regs->gr[24], regs); + error = do_execve(filename, + (const char __user *const __user *) regs->gr[25], + (const char __user *const __user *) regs->gr[24], + regs); putname(filename); diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 76332da..4b4b918 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -348,17 +348,22 @@ asmlinkage int sys_execve(struct pt_regs *regs) error = PTR_ERR(filename); if (IS_ERR(filename)) goto out; - error = do_execve(filename, (char __user * __user *) regs->gr[25], - (char __user * __user *) regs->gr[24], regs); + error = do_execve(filename, + (const char __user *const __user *) regs->gr[25], + (const char __user *const __user *) regs->gr[24], + regs); putname(filename); out: return error; } -extern int __execve(const char *filename, char *const argv[], - char *const envp[], struct task_struct *task); -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +extern int __execve(const char *filename, + const char *const argv[], + const char *const envp[], struct task_struct *task); +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { return __execve(filename, argv, envp, current); } diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 049dda6..39a2baa 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -94,9 +94,9 @@ void cpu_idle(void) HMT_medium(); ppc64_runlatch_on(); tick_nohz_restart_sched_tick(); + preempt_enable_no_resched(); if (cpu_should_die()) cpu_die(); - preempt_enable_no_resched(); schedule(); preempt_disable(); } diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index feacfb7..91356ff 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1034,8 +1034,9 @@ int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, flush_fp_to_thread(current); flush_altivec_to_thread(current); flush_spe_to_thread(current); - error = do_execve(filename, (char __user * __user *) a1, - (char __user * __user *) a2, regs); + error = do_execve(filename, + (const char __user *const __user *) a1, + (const char __user *const __user *) a2, regs); putname(filename); out: return error; diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 7eafaf2..d3a2d1c 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -267,8 +267,9 @@ asmlinkage void execve_tail(void) /* * sys_execve() executes a new program. */ -SYSCALL_DEFINE3(execve, const char __user *, name, char __user * __user *, argv, - char __user * __user *, envp) +SYSCALL_DEFINE3(execve, const char __user *, name, + const char __user *const __user *, argv, + const char __user *const __user *, envp) { struct pt_regs *regs = task_pt_regs(current); char *filename; diff --git a/arch/score/kernel/sys_score.c b/arch/score/kernel/sys_score.c index 651096f..e478bf9 100644 --- a/arch/score/kernel/sys_score.c +++ b/arch/score/kernel/sys_score.c @@ -99,8 +99,10 @@ score_execve(struct pt_regs *regs) if (IS_ERR(filename)) return error; - error = do_execve(filename, (char __user *__user*)regs->regs[5], - (char __user *__user *) regs->regs[6], regs); + error = do_execve(filename, + (const char __user *const __user *)regs->regs[5], + (const char __user *const __user *)regs->regs[6], + regs); putname(filename); return error; @@ -110,7 +112,9 @@ score_execve(struct pt_regs *regs) * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __r4 asm("r4") = (unsigned long) filename; register unsigned long __r5 asm("r5") = (unsigned long) argv; diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 0529819..762a139 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -296,9 +296,10 @@ asmlinkage int sys_vfork(unsigned long r4, unsigned long r5, /* * sys_execve() executes a new program. */ -asmlinkage int sys_execve(char __user *ufilename, char __user * __user *uargv, - char __user * __user *uenvp, unsigned long r7, - struct pt_regs __regs) +asmlinkage int sys_execve(const char __user *ufilename, + const char __user *const __user *uargv, + const char __user *const __user *uenvp, + unsigned long r7, struct pt_regs __regs) { struct pt_regs *regs = RELOC_HIDE(&__regs, 0); int error; diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 68d128d..210c1ca 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -497,8 +497,8 @@ asmlinkage int sys_execve(const char *ufilename, char **uargv, goto out; error = do_execve(filename, - (char __user * __user *)uargv, - (char __user * __user *)uenvp, + (const char __user *const __user *)uargv, + (const char __user *const __user *)uenvp, pregs); putname(filename); out: diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index eb68bfd..f56b6fe 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -71,7 +71,9 @@ asmlinkage int sys_fadvise64_64_wrapper(int fd, u32 offset0, u32 offset1, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register long __sc0 __asm__ ("r3") = __NR_execve; register long __sc4 __asm__ ("r4") = (long) filename; diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c index 2872357..c5a38c4 100644 --- a/arch/sh/kernel/sys_sh64.c +++ b/arch/sh/kernel/sys_sh64.c @@ -33,7 +33,9 @@ * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_execve); register unsigned long __sc2 __asm__ ("r2") = (unsigned long) filename; diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index 2050ca0..f0c7422 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -25,9 +25,9 @@ extern void atomic_sub(int, atomic_t *); extern void atomic64_sub(int, atomic64_t *); extern int atomic_add_ret(int, atomic_t *); -extern int atomic64_add_ret(int, atomic64_t *); +extern long atomic64_add_ret(int, atomic64_t *); extern int atomic_sub_ret(int, atomic_t *); -extern int atomic64_sub_ret(int, atomic64_t *); +extern long atomic64_sub_ret(int, atomic64_t *); #define atomic_dec_return(v) atomic_sub_ret(1, v) #define atomic64_dec_return(v) atomic64_sub_ret(1, v) @@ -91,7 +91,7 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) ((__typeof__((v)->counter))cmpxchg(&((v)->counter), (o), (n))) #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) -static inline int atomic64_add_unless(atomic64_t *v, long a, long u) +static inline long atomic64_add_unless(atomic64_t *v, long a, long u) { long c, old; c = atomic64_read(v); diff --git a/arch/sparc/include/asm/fb.h b/arch/sparc/include/asm/fb.h index e834880..2173432 100644 --- a/arch/sparc/include/asm/fb.h +++ b/arch/sparc/include/asm/fb.h @@ -1,5 +1,6 @@ #ifndef _SPARC_FB_H_ #define _SPARC_FB_H_ +#include #include #include #include @@ -18,6 +19,9 @@ static inline int fb_is_primary_device(struct fb_info *info) struct device *dev = info->device; struct device_node *node; + if (console_set_on_cmdline) + return 0; + node = dev->of_node; if (node && node == of_console_device) diff --git a/arch/sparc/include/asm/rwsem-const.h b/arch/sparc/include/asm/rwsem-const.h index a303c9d..e4c61a1 100644 --- a/arch/sparc/include/asm/rwsem-const.h +++ b/arch/sparc/include/asm/rwsem-const.h @@ -5,7 +5,7 @@ #define RWSEM_UNLOCKED_VALUE 0x00000000 #define RWSEM_ACTIVE_BIAS 0x00000001 #define RWSEM_ACTIVE_MASK 0x0000ffff -#define RWSEM_WAITING_BIAS 0xffff0000 +#define RWSEM_WAITING_BIAS (-0x00010000) #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index d0b3b01..03eb5a8 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -397,8 +397,11 @@ #define __NR_rt_tgsigqueueinfo 326 #define __NR_perf_event_open 327 #define __NR_recvmmsg 328 +#define __NR_fanotify_init 329 +#define __NR_fanotify_mark 330 +#define __NR_prlimit64 331 -#define NR_syscalls 329 +#define NR_syscalls 332 #ifdef __32bit_syscall_numbers__ /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 40e29fc..1752929 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -633,8 +633,10 @@ asmlinkage int sparc_execve(struct pt_regs *regs) if(IS_ERR(filename)) goto out; error = do_execve(filename, - (char __user * __user *)regs->u_regs[base + UREG_I1], - (char __user * __user *)regs->u_regs[base + UREG_I2], + (const char __user *const __user *) + regs->u_regs[base + UREG_I1], + (const char __user *const __user *) + regs->u_regs[base + UREG_I2], regs); putname(filename); out: diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index dbe81a3..485f547 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -739,9 +739,9 @@ asmlinkage int sparc_execve(struct pt_regs *regs) if (IS_ERR(filename)) goto out; error = do_execve(filename, - (char __user * __user *) + (const char __user *const __user *) regs->u_regs[base + UREG_I1], - (char __user * __user *) + (const char __user *const __user *) regs->u_regs[base + UREG_I2], regs); putname(filename); if (!error) { diff --git a/arch/sparc/kernel/sys32.S b/arch/sparc/kernel/sys32.S index 46a76ba..44e5faf 100644 --- a/arch/sparc/kernel/sys32.S +++ b/arch/sparc/kernel/sys32.S @@ -330,6 +330,15 @@ do_sys_accept4: /* sys_accept4(int, struct sockaddr *, int *, int) */ nop nop + .globl sys32_fanotify_mark +sys32_fanotify_mark: + sethi %hi(sys_fanotify_mark), %g1 + sllx %o2, 32, %o2 + or %o2, %o3, %o2 + mov %o4, %o3 + jmpl %g1 + %lo(sys_fanotify_mark), %g0 + mov %o5, %o4 + .section __ex_table,"a" .align 4 .word 1b, __retl_efault, 2b, __retl_efault diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index ee995b7..5079413 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -282,7 +282,9 @@ out: * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; register long __g1 __asm__ ("g1") = __NR_execve; diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 3d435c4..f836f4e 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -758,7 +758,9 @@ SYSCALL_DEFINE5(rt_sigaction, int, sig, const struct sigaction __user *, act, * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; register long __g1 __asm__ ("g1") = __NR_execve; diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 801fc8e..ec396e1 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -82,5 +82,6 @@ sys_call_table: /*310*/ .long sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate /*315*/ .long sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv -/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg +/*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init +/*330*/ .long sys_fanotify_mark, sys_prlimit64 diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 9db058d..8cfcaa5 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -83,7 +83,8 @@ sys_call_table32: /*310*/ .word compat_sys_utimensat, compat_sys_signalfd, sys_timerfd_create, sys_eventfd, compat_sys_fallocate .word compat_sys_timerfd_settime, compat_sys_timerfd_gettime, compat_sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv - .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg + .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init +/*330*/ .word sys32_fanotify_mark, sys_prlimit64 #endif /* CONFIG_COMPAT */ @@ -158,4 +159,5 @@ sys_call_table: /*310*/ .word sys_utimensat, sys_signalfd, sys_timerfd_create, sys_eventfd, sys_fallocate .word sys_timerfd_settime, sys_timerfd_gettime, sys_signalfd4, sys_eventfd2, sys_epoll_create1 /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv - .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg + .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init +/*330*/ .word sys_fanotify_mark, sys_prlimit64 diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index ed590ad..985cc28 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -543,8 +543,9 @@ long _sys_vfork(struct pt_regs *regs) /* * sys_execve() executes a new program. */ -long _sys_execve(char __user *path, char __user *__user *argv, - char __user *__user *envp, struct pt_regs *regs) +long _sys_execve(const char __user *path, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { long error; char *filename; diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 59b20d9..cd145ed 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -44,8 +44,9 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp) PT_REGS_SP(regs) = esp; } -static long execve1(const char *file, char __user * __user *argv, - char __user *__user *env) +static long execve1(const char *file, + const char __user *const __user *argv, + const char __user *const __user *env) { long error; diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c index 7427c0b..5ddb246 100644 --- a/arch/um/kernel/syscall.c +++ b/arch/um/kernel/syscall.c @@ -51,7 +51,9 @@ long old_mmap(unsigned long addr, unsigned long len, return err; } -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { mm_segment_t fs; int ret; diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index feb2ff9..f1d8b44 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h @@ -23,8 +23,9 @@ long sys_iopl(unsigned int, struct pt_regs *); /* kernel/process.c */ int sys_fork(struct pt_regs *); int sys_vfork(struct pt_regs *); -long sys_execve(const char __user *, char __user * __user *, - char __user * __user *, struct pt_regs *); +long sys_execve(const char __user *, + const char __user *const __user *, + const char __user *const __user *, struct pt_regs *); long sys_clone(unsigned long, unsigned long, void __user *, void __user *, struct pt_regs *); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index ef10940..852b819 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -194,7 +194,7 @@ static struct hw_breakpoint { unsigned long addr; int len; int type; - struct perf_event **pev; + struct perf_event * __percpu *pev; } breakinfo[HBP_NUM]; static unsigned long early_dr7; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 64ecaf0..57d1868 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -301,8 +301,9 @@ EXPORT_SYMBOL(kernel_thread); /* * sys_execve() executes a new program. */ -long sys_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +long sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { long error; char *filename; diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 196552b..d5e0662 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c @@ -28,7 +28,9 @@ * Do a system call from kernel instead of calling sys_execve so we * end up with proper pt_regs. */ -int kernel_execve(const char *filename, char *const argv[], char *const envp[]) +int kernel_execve(const char *filename, + const char *const argv[], + const char *const envp[]) { long __res; asm volatile ("push %%ebx ; movl %2,%%ebx ; int $0x80 ; pop %%ebx" diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 7c2f38f..e3558b9 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -318,8 +318,9 @@ long xtensa_clone(unsigned long clone_flags, unsigned long newsp, */ asmlinkage -long xtensa_execve(const char __user *name, char __user * __user *argv, - char __user * __user *envp, +long xtensa_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp, long a3, long a4, long a5, struct pt_regs *regs) { diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index ea24c1e..2673a3d 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -1588,7 +1588,7 @@ static const struct ata_port_info sata_dwc_port_info[] = { }, }; -static int sata_dwc_probe(struct of_device *ofdev, +static int sata_dwc_probe(struct platform_device *ofdev, const struct of_device_id *match) { struct sata_dwc_device *hsdev; @@ -1702,7 +1702,7 @@ error_out: return err; } -static int sata_dwc_remove(struct of_device *ofdev) +static int sata_dwc_remove(struct platform_device *ofdev) { struct device *dev = &ofdev->dev; struct ata_host *host = dev_get_drvdata(dev); diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 2982b3e..057413b 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -94,6 +94,7 @@ #include #include #if defined(CONFIG_OF) +#include #include #include #endif diff --git a/drivers/char/pty.c b/drivers/char/pty.c index ad46eae..c350d01 100644 --- a/drivers/char/pty.c +++ b/drivers/char/pty.c @@ -675,8 +675,8 @@ static int ptmx_open(struct inode *inode, struct file *filp) } set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */ - filp->private_data = tty; - file_move(filp, &tty->tty_files); + + tty_add_file(tty, filp); retval = devpts_pty_new(inode, tty->link); if (retval) diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 0350c42..949067a 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -136,6 +136,9 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */ DEFINE_MUTEX(tty_mutex); EXPORT_SYMBOL(tty_mutex); +/* Spinlock to protect the tty->tty_files list */ +DEFINE_SPINLOCK(tty_files_lock); + static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); static ssize_t tty_write(struct file *, const char __user *, size_t, loff_t *); ssize_t redirected_tty_write(struct file *, const char __user *, @@ -185,6 +188,41 @@ void free_tty_struct(struct tty_struct *tty) kfree(tty); } +static inline struct tty_struct *file_tty(struct file *file) +{ + return ((struct tty_file_private *)file->private_data)->tty; +} + +/* Associate a new file with the tty structure */ +void tty_add_file(struct tty_struct *tty, struct file *file) +{ + struct tty_file_private *priv; + + /* XXX: must implement proper error handling in callers */ + priv = kmalloc(sizeof(*priv), GFP_KERNEL|__GFP_NOFAIL); + + priv->tty = tty; + priv->file = file; + file->private_data = priv; + + spin_lock(&tty_files_lock); + list_add(&priv->list, &tty->tty_files); + spin_unlock(&tty_files_lock); +} + +/* Delete file from its tty */ +void tty_del_file(struct file *file) +{ + struct tty_file_private *priv = file->private_data; + + spin_lock(&tty_files_lock); + list_del(&priv->list); + spin_unlock(&tty_files_lock); + file->private_data = NULL; + kfree(priv); +} + + #define TTY_NUMBER(tty) ((tty)->index + (tty)->driver->name_base) /** @@ -235,11 +273,11 @@ static int check_tty_count(struct tty_struct *tty, const char *routine) struct list_head *p; int count = 0; - file_list_lock(); + spin_lock(&tty_files_lock); list_for_each(p, &tty->tty_files) { count++; } - file_list_unlock(); + spin_unlock(&tty_files_lock); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_SLAVE && tty->link && tty->link->count) @@ -497,6 +535,7 @@ void __tty_hangup(struct tty_struct *tty) struct file *cons_filp = NULL; struct file *filp, *f = NULL; struct task_struct *p; + struct tty_file_private *priv; int closecount = 0, n; unsigned long flags; int refs = 0; @@ -506,7 +545,7 @@ void __tty_hangup(struct tty_struct *tty) spin_lock(&redirect_lock); - if (redirect && redirect->private_data == tty) { + if (redirect && file_tty(redirect) == tty) { f = redirect; redirect = NULL; } @@ -519,9 +558,10 @@ void __tty_hangup(struct tty_struct *tty) workqueue with the lock held */ check_tty_count(tty, "tty_hangup"); - file_list_lock(); + spin_lock(&tty_files_lock); /* This breaks for file handles being sent over AF_UNIX sockets ? */ - list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) { + list_for_each_entry(priv, &tty->tty_files, list) { + filp = priv->file; if (filp->f_op->write == redirected_tty_write) cons_filp = filp; if (filp->f_op->write != tty_write) @@ -530,7 +570,7 @@ void __tty_hangup(struct tty_struct *tty) __tty_fasync(-1, filp, 0); /* can't block */ filp->f_op = &hung_up_tty_fops; } - file_list_unlock(); + spin_unlock(&tty_files_lock); tty_ldisc_hangup(tty); @@ -889,12 +929,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { int i; - struct tty_struct *tty; - struct inode *inode; + struct inode *inode = file->f_path.dentry->d_inode; + struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; - tty = file->private_data; - inode = file->f_path.dentry->d_inode; if (tty_paranoia_check(tty, inode, "tty_read")) return -EIO; if (!tty || (test_bit(TTY_IO_ERROR, &tty->flags))) @@ -1065,12 +1103,11 @@ void tty_write_message(struct tty_struct *tty, char *msg) static ssize_t tty_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct tty_struct *tty; struct inode *inode = file->f_path.dentry->d_inode; + struct tty_struct *tty = file_tty(file); + struct tty_ldisc *ld; ssize_t ret; - struct tty_ldisc *ld; - tty = file->private_data; if (tty_paranoia_check(tty, inode, "tty_write")) return -EIO; if (!tty || !tty->ops->write || @@ -1424,9 +1461,9 @@ static void release_one_tty(struct work_struct *work) tty_driver_kref_put(driver); module_put(driver->owner); - file_list_lock(); + spin_lock(&tty_files_lock); list_del_init(&tty->tty_files); - file_list_unlock(); + spin_unlock(&tty_files_lock); put_pid(tty->pgrp); put_pid(tty->session); @@ -1507,13 +1544,13 @@ static void release_tty(struct tty_struct *tty, int idx) int tty_release(struct inode *inode, struct file *filp) { - struct tty_struct *tty, *o_tty; + struct tty_struct *tty = file_tty(filp); + struct tty_struct *o_tty; int pty_master, tty_closing, o_tty_closing, do_sleep; int devpts; int idx; char buf[64]; - tty = filp->private_data; if (tty_paranoia_check(tty, inode, "tty_release_dev")) return 0; @@ -1671,8 +1708,7 @@ int tty_release(struct inode *inode, struct file *filp) * - do_tty_hangup no longer sees this file descriptor as * something that needs to be handled for hangups. */ - file_kill(filp); - filp->private_data = NULL; + tty_del_file(filp); /* * Perform some housekeeping before deciding whether to return. @@ -1839,8 +1875,8 @@ got_driver: return PTR_ERR(tty); } - filp->private_data = tty; - file_move(filp, &tty->tty_files); + tty_add_file(tty, filp); + check_tty_count(tty, "tty_open"); if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) @@ -1916,11 +1952,10 @@ got_driver: static unsigned int tty_poll(struct file *filp, poll_table *wait) { - struct tty_struct *tty; + struct tty_struct *tty = file_tty(filp); struct tty_ldisc *ld; int ret = 0; - tty = filp->private_data; if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_poll")) return 0; @@ -1933,11 +1968,10 @@ static unsigned int tty_poll(struct file *filp, poll_table *wait) static int __tty_fasync(int fd, struct file *filp, int on) { - struct tty_struct *tty; + struct tty_struct *tty = file_tty(filp); unsigned long flags; int retval = 0; - tty = filp->private_data; if (tty_paranoia_check(tty, filp->f_path.dentry->d_inode, "tty_fasync")) goto out; @@ -2491,13 +2525,13 @@ EXPORT_SYMBOL(tty_pair_get_pty); */ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct tty_struct *tty, *real_tty; + struct tty_struct *tty = file_tty(file); + struct tty_struct *real_tty; void __user *p = (void __user *)arg; int retval; struct tty_ldisc *ld; struct inode *inode = file->f_dentry->d_inode; - tty = file->private_data; if (tty_paranoia_check(tty, inode, "tty_ioctl")) return -EINVAL; @@ -2619,7 +2653,7 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct inode *inode = file->f_dentry->d_inode; - struct tty_struct *tty = file->private_data; + struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; int retval = -ENOIOCTLCMD; @@ -2711,7 +2745,7 @@ void __do_SAK(struct tty_struct *tty) if (!filp) continue; if (filp->f_op->read == tty_read && - filp->private_data == tty) { + file_tty(filp) == tty) { printk(KERN_NOTICE "SAK: killed process %d" " (%s): fd#%d opened to the tty\n", task_pid_nr(p), p->comm, i); diff --git a/drivers/char/vt.c b/drivers/char/vt.c index c734f9b..50590c7 100644 --- a/drivers/char/vt.c +++ b/drivers/char/vt.c @@ -194,10 +194,11 @@ static DECLARE_WORK(console_work, console_callback); int fg_console; int last_console; int want_console = -1; -int saved_fg_console; -int saved_last_console; -int saved_want_console; -int saved_vc_mode; +static int saved_fg_console; +static int saved_last_console; +static int saved_want_console; +static int saved_vc_mode; +static int saved_console_blanked; /* * For each existing display, we have a pointer to console currently visible @@ -3449,6 +3450,7 @@ int con_debug_enter(struct vc_data *vc) saved_last_console = last_console; saved_want_console = want_console; saved_vc_mode = vc->vc_mode; + saved_console_blanked = console_blanked; vc->vc_mode = KD_TEXT; console_blanked = 0; if (vc->vc_sw->con_debug_enter) @@ -3492,6 +3494,7 @@ int con_debug_leave(void) fg_console = saved_fg_console; last_console = saved_last_console; want_console = saved_want_console; + console_blanked = saved_console_blanked; vc_cons[fg_console].d->vc_mode = saved_vc_mode; vc = vc_cons[fg_console].d; diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c index 0ed763c..b663d57 100644 --- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c +++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c @@ -94,6 +94,7 @@ #ifdef CONFIG_OF /* For open firmware. */ +#include #include #include #endif diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e635199..0c52899 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1299,6 +1299,7 @@ static const struct hid_device_id hid_blacklist[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CYPRESS, USB_DEVICE_ID_CYPRESS_MOUSE) }, { HID_USB_DEVICE(USB_VENDOR_ID_DRAGONRISE, 0x0006) }, { HID_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH) }, + { HID_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH1) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_ELECOM, USB_DEVICE_ID_ELECOM_BM084) }, { HID_USB_DEVICE(USB_VENDOR_ID_EZKEY, USB_DEVICE_ID_BTC_8193) }, { HID_USB_DEVICE(USB_VENDOR_ID_GAMERON, USB_DEVICE_ID_GAMERON_DUAL_PSX_ADAPTOR) }, diff --git a/drivers/hid/hid-egalax.c b/drivers/hid/hid-egalax.c index f44bdc0..8ca7f65 100644 --- a/drivers/hid/hid-egalax.c +++ b/drivers/hid/hid-egalax.c @@ -159,6 +159,13 @@ static int egalax_event(struct hid_device *hid, struct hid_field *field, { struct egalax_data *td = hid_get_drvdata(hid); + /* Note, eGalax has two product lines: the first is resistive and + * uses a standard parallel multitouch protocol (product ID == + * 48xx). The second is capacitive and uses an unusual "serial" + * protocol with a different message for each multitouch finger + * (product ID == 72xx). We do not yet generate a correct event + * sequence for the capacitive/serial protocol. + */ if (hid->claimed & HID_CLAIMED_INPUT) { struct input_dev *input = field->hidinput->input; @@ -246,6 +253,8 @@ static void egalax_remove(struct hid_device *hdev) static const struct hid_device_id egalax_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_DWAV, USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH) }, + { HID_USB_DEVICE(USB_VENDOR_ID_DWAV, + USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH1) }, { } }; MODULE_DEVICE_TABLE(hid, egalax_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index d3fc13a..85c6d13 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -188,6 +188,7 @@ #define USB_VENDOR_ID_DWAV 0x0eef #define USB_DEVICE_ID_EGALAX_TOUCHCONTROLLER 0x0001 #define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH 0x480d +#define USB_DEVICE_ID_DWAV_EGALAX_MULTITOUCH1 0x720c #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 diff --git a/drivers/hid/hid-picolcd.c b/drivers/hid/hid-picolcd.c index 346f0e3..bc2e077 100644 --- a/drivers/hid/hid-picolcd.c +++ b/drivers/hid/hid-picolcd.c @@ -547,11 +547,11 @@ static void picolcd_fb_destroy(struct fb_info *info) ref_cnt--; mutex_lock(&info->lock); (*ref_cnt)--; - may_release = !ref_cnt; + may_release = !*ref_cnt; mutex_unlock(&info->lock); if (may_release) { - framebuffer_release(info); vfree((u8 *)info->fix.smem_start); + framebuffer_release(info); } } diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 254a003..0a29c51 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -266,13 +266,15 @@ static int hiddev_open(struct inode *inode, struct file *file) { struct hiddev_list *list; struct usb_interface *intf; + struct hid_device *hid; struct hiddev *hiddev; int res; intf = usb_find_interface(&hiddev_driver, iminor(inode)); if (!intf) return -ENODEV; - hiddev = usb_get_intfdata(intf); + hid = usb_get_intfdata(intf); + hiddev = hid->hiddev; if (!(list = kzalloc(sizeof(struct hiddev_list), GFP_KERNEL))) return -ENOMEM; @@ -587,7 +589,7 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct hiddev_list *list = file->private_data; struct hiddev *hiddev = list->hiddev; struct hid_device *hid = hiddev->hid; - struct usb_device *dev = hid_to_usb_dev(hid); + struct usb_device *dev; struct hiddev_collection_info cinfo; struct hiddev_report_info rinfo; struct hiddev_field_info finfo; @@ -601,9 +603,11 @@ static long hiddev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) /* Called without BKL by compat methods so no BKL taken */ /* FIXME: Who or what stop this racing with a disconnect ?? */ - if (!hiddev->exist) + if (!hiddev->exist || !hid) return -EIO; + dev = hid_to_usb_dev(hid); + switch (cmd) { case HIDIOCGVERSION: @@ -888,7 +892,6 @@ int hiddev_connect(struct hid_device *hid, unsigned int force) hid->hiddev = hiddev; hiddev->hid = hid; hiddev->exist = 1; - usb_set_intfdata(usbhid->intf, usbhid); retval = usb_register_dev(usbhid->intf, &hiddev_class); if (retval) { err_hid("Not able to get a minor for this device."); diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index c908c5f..5808731 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -28,7 +28,7 @@ struct evdev { int minor; struct input_handle handle; wait_queue_head_t wait; - struct evdev_client *grab; + struct evdev_client __rcu *grab; struct list_head client_list; spinlock_t client_lock; /* protects client_list */ struct mutex mutex; diff --git a/drivers/md/md.c b/drivers/md/md.c index 11567c7..c148b63 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2136,16 +2136,6 @@ static void sync_sbs(mddev_t * mddev, int nospares) * with the rest of the array) */ mdk_rdev_t *rdev; - - /* First make sure individual recovery_offsets are correct */ - list_for_each_entry(rdev, &mddev->disks, same_set) { - if (rdev->raid_disk >= 0 && - mddev->delta_disks >= 0 && - !test_bit(In_sync, &rdev->flags) && - mddev->curr_resync_completed > rdev->recovery_offset) - rdev->recovery_offset = mddev->curr_resync_completed; - - } list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->sb_events == mddev->events || (nospares && @@ -2167,12 +2157,27 @@ static void md_update_sb(mddev_t * mddev, int force_change) int sync_req; int nospares = 0; - mddev->utime = get_seconds(); - if (mddev->external) - return; repeat: + /* First make sure individual recovery_offsets are correct */ + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (rdev->raid_disk >= 0 && + mddev->delta_disks >= 0 && + !test_bit(In_sync, &rdev->flags) && + mddev->curr_resync_completed > rdev->recovery_offset) + rdev->recovery_offset = mddev->curr_resync_completed; + + } + if (mddev->external || !mddev->persistent) { + clear_bit(MD_CHANGE_DEVS, &mddev->flags); + clear_bit(MD_CHANGE_CLEAN, &mddev->flags); + wake_up(&mddev->sb_wait); + return; + } + spin_lock_irq(&mddev->write_lock); + mddev->utime = get_seconds(); + set_bit(MD_CHANGE_PENDING, &mddev->flags); if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags)) force_change = 1; @@ -2221,19 +2226,6 @@ repeat: MD_BUG(); mddev->events --; } - - /* - * do not write anything to disk if using - * nonpersistent superblocks - */ - if (!mddev->persistent) { - if (!mddev->external) - clear_bit(MD_CHANGE_PENDING, &mddev->flags); - - spin_unlock_irq(&mddev->write_lock); - wake_up(&mddev->sb_wait); - return; - } sync_sbs(mddev, nospares); spin_unlock_irq(&mddev->write_lock); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 73cc74f..ad83a4d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -787,8 +787,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) struct bio_list bl; struct page **behind_pages = NULL; const int rw = bio_data_dir(bio); - const bool do_sync = (bio->bi_rw & REQ_SYNC); - bool do_barriers; + const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); + unsigned long do_barriers; mdk_rdev_t *blocked_rdev; /* @@ -1120,6 +1120,8 @@ static int raid1_spare_active(mddev_t *mddev) { int i; conf_t *conf = mddev->private; + int count = 0; + unsigned long flags; /* * Find all failed disks within the RAID1 configuration @@ -1131,15 +1133,16 @@ static int raid1_spare_active(mddev_t *mddev) if (rdev && !test_bit(Faulty, &rdev->flags) && !test_and_set_bit(In_sync, &rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded--; - spin_unlock_irqrestore(&conf->device_lock, flags); + count++; + sysfs_notify_dirent(rdev->sysfs_state); } } + spin_lock_irqsave(&conf->device_lock, flags); + mddev->degraded -= count; + spin_unlock_irqrestore(&conf->device_lock, flags); print_conf(conf); - return 0; + return count; } @@ -1640,7 +1643,7 @@ static void raid1d(mddev_t *mddev) * We already have a nr_pending reference on these rdevs. */ int i; - const bool do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC); + const unsigned long do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC); clear_bit(R1BIO_BarrierRetry, &r1_bio->state); clear_bit(R1BIO_Barrier, &r1_bio->state); for (i=0; i < conf->raid_disks; i++) @@ -1696,7 +1699,7 @@ static void raid1d(mddev_t *mddev) (unsigned long long)r1_bio->sector); raid_end_bio_io(r1_bio); } else { - const bool do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC; + const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC; r1_bio->bios[r1_bio->read_disk] = mddev->ro ? IO_BLOCKED : NULL; r1_bio->read_disk = disk; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index a88aeb5..8471838 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -799,7 +799,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) int i; int chunk_sects = conf->chunk_mask + 1; const int rw = bio_data_dir(bio); - const bool do_sync = (bio->bi_rw & REQ_SYNC); + const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); struct bio_list bl; unsigned long flags; mdk_rdev_t *blocked_rdev; @@ -1116,6 +1116,8 @@ static int raid10_spare_active(mddev_t *mddev) int i; conf_t *conf = mddev->private; mirror_info_t *tmp; + int count = 0; + unsigned long flags; /* * Find all non-in_sync disks within the RAID10 configuration @@ -1126,15 +1128,16 @@ static int raid10_spare_active(mddev_t *mddev) if (tmp->rdev && !test_bit(Faulty, &tmp->rdev->flags) && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded--; - spin_unlock_irqrestore(&conf->device_lock, flags); + count++; + sysfs_notify_dirent(tmp->rdev->sysfs_state); } } + spin_lock_irqsave(&conf->device_lock, flags); + mddev->degraded -= count; + spin_unlock_irqrestore(&conf->device_lock, flags); print_conf(conf); - return 0; + return count; } @@ -1734,7 +1737,7 @@ static void raid10d(mddev_t *mddev) raid_end_bio_io(r10_bio); bio_put(bio); } else { - const bool do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); + const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); bio_put(bio); rdev = conf->mirrors[mirror].rdev; if (printk_ratelimit()) diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 866d4b5..69b0a16 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5330,6 +5330,8 @@ static int raid5_spare_active(mddev_t *mddev) int i; raid5_conf_t *conf = mddev->private; struct disk_info *tmp; + int count = 0; + unsigned long flags; for (i = 0; i < conf->raid_disks; i++) { tmp = conf->disks + i; @@ -5337,14 +5339,15 @@ static int raid5_spare_active(mddev_t *mddev) && tmp->rdev->recovery_offset == MaxSector && !test_bit(Faulty, &tmp->rdev->flags) && !test_and_set_bit(In_sync, &tmp->rdev->flags)) { - unsigned long flags; - spin_lock_irqsave(&conf->device_lock, flags); - mddev->degraded--; - spin_unlock_irqrestore(&conf->device_lock, flags); + count++; + sysfs_notify_dirent(tmp->rdev->sysfs_state); } } + spin_lock_irqsave(&conf->device_lock, flags); + mddev->degraded -= count; + spin_unlock_irqrestore(&conf->device_lock, flags); print_raid5_conf(conf); - return 0; + return count; } static int raid5_remove_disk(mddev_t *mddev, int number) diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 0efe631..d80cfdc 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -86,7 +86,9 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) init_waitqueue_head(&host->wq); INIT_DELAYED_WORK(&host->detect, mmc_rescan); INIT_DELAYED_WORK_DEFERRABLE(&host->disable, mmc_host_deeper_disable); +#ifdef CONFIG_PM host->pm_notify.notifier_call = mmc_pm_notify; +#endif /* * By default, hosts do not support SGIO or large requests. diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c index 00af55d..fe63f6b 100644 --- a/drivers/mtd/maps/physmap_of.c +++ b/drivers/mtd/maps/physmap_of.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 0d5de25..373dcfe 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -476,6 +477,26 @@ ath5k_pci_probe(struct pci_dev *pdev, int ret; u8 csz; + /* + * L0s needs to be disabled on all ath5k cards. + * + * For distributions shipping with CONFIG_PCIEASPM (this will be enabled + * by default in the future in 2.6.36) this will also mean both L1 and + * L0s will be disabled when a pre 1.1 PCIe device is detected. We do + * know L1 works correctly even for all ath5k pre 1.1 PCIe devices + * though but cannot currently undue the effect of a blacklist, for + * details you can read pcie_aspm_sanity_check() and see how it adjusts + * the device link capability. + * + * It may be possible in the future to implement some PCI API to allow + * drivers to override blacklists for pre 1.1 PCIe but for now it is + * best to accept that both L0s and L1 will be disabled completely for + * distributions shipping with CONFIG_PCIEASPM rather than having this + * issue present. Motivation for adding this new API will be to help + * with power consumption for some of these devices. + */ + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S); + ret = pci_enable_device(pdev); if (ret) { dev_err(&pdev->dev, "can't enable device\n"); diff --git a/drivers/net/wireless/ath/ath9k/eeprom.h b/drivers/net/wireless/ath/ath9k/eeprom.h index 8750c55..7f48df1 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom.h +++ b/drivers/net/wireless/ath/ath9k/eeprom.h @@ -191,6 +191,7 @@ #define AR9287_EEP_NO_BACK_VER AR9287_EEP_MINOR_VER_1 #define AR9287_EEP_START_LOC 128 +#define AR9287_HTC_EEP_START_LOC 256 #define AR9287_NUM_2G_CAL_PIERS 3 #define AR9287_NUM_2G_CCK_TARGET_POWERS 3 #define AR9287_NUM_2G_20_TARGET_POWERS 3 diff --git a/drivers/net/wireless/ath/ath9k/eeprom_9287.c b/drivers/net/wireless/ath/ath9k/eeprom_9287.c index 4a52cf0..dff2da7 100644 --- a/drivers/net/wireless/ath/ath9k/eeprom_9287.c +++ b/drivers/net/wireless/ath/ath9k/eeprom_9287.c @@ -34,9 +34,14 @@ static bool ath9k_hw_ar9287_fill_eeprom(struct ath_hw *ah) struct ar9287_eeprom *eep = &ah->eeprom.map9287; struct ath_common *common = ath9k_hw_common(ah); u16 *eep_data; - int addr, eep_start_loc = AR9287_EEP_START_LOC; + int addr, eep_start_loc; eep_data = (u16 *)eep; + if (ah->hw_version.devid == 0x7015) + eep_start_loc = AR9287_HTC_EEP_START_LOC; + else + eep_start_loc = AR9287_EEP_START_LOC; + if (!ath9k_hw_use_flash(ah)) { ath_print(common, ATH_DBG_EEPROM, "Reading from EEPROM, not flash\n"); diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index 61c1bee..17e7a9a 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -799,7 +799,7 @@ static int ath9k_hif_usb_download_fw(struct hif_device_usb *hif_dev) } kfree(buf); - if (hif_dev->device_id == 0x7010) + if ((hif_dev->device_id == 0x7010) || (hif_dev->device_id == 0x7015)) firm_offset = AR7010_FIRMWARE_TEXT; else firm_offset = AR9271_FIRMWARE_TEXT; @@ -901,6 +901,7 @@ static int ath9k_hif_usb_probe(struct usb_interface *interface, switch(hif_dev->device_id) { case 0x7010: + case 0x7015: case 0x9018: if (le16_to_cpu(udev->descriptor.bcdDevice) == 0x0202) hif_dev->fw_name = FIRMWARE_AR7010_1_1; @@ -912,11 +913,6 @@ static int ath9k_hif_usb_probe(struct usb_interface *interface, break; } - if (!hif_dev->fw_name) { - dev_err(&udev->dev, "Can't determine firmware !\n"); - goto err_htc_hw_alloc; - } - ret = ath9k_hif_usb_dev_init(hif_dev); if (ret) { ret = -EINVAL; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index 148b433..2d42791 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -245,6 +245,7 @@ static int ath9k_init_htc_services(struct ath9k_htc_priv *priv, u16 devid) switch(devid) { case 0x7010: + case 0x7015: case 0x9018: priv->htc->credits = 45; break; diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index ebed9d1..7d09b4b 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -366,7 +366,8 @@ static void ath9k_htc_setup_rate(struct ath9k_htc_priv *priv, caps = WLAN_RC_HT_FLAG; if (sta->ht_cap.mcs.rx_mask[1]) caps |= WLAN_RC_DS_FLAG; - if (sta->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) + if ((sta->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) && + (conf_is_ht40(&priv->hw->conf))) caps |= WLAN_RC_40_FLAG; if (conf_is_ht40(&priv->hw->conf) && (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40)) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c index bd0b4ac..2a6e45a 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c @@ -78,18 +78,23 @@ int ath9k_htc_tx_start(struct ath9k_htc_priv *priv, struct sk_buff *skb) struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ieee80211_sta *sta = tx_info->control.sta; struct ath9k_htc_sta *ista; - struct ath9k_htc_vif *avp; struct ath9k_htc_tx_ctl tx_ctl; enum htc_endpoint_id epid; u16 qnum; __le16 fc; u8 *tx_fhdr; - u8 sta_idx; + u8 sta_idx, vif_idx; hdr = (struct ieee80211_hdr *) skb->data; fc = hdr->frame_control; - avp = (struct ath9k_htc_vif *) tx_info->control.vif->drv_priv; + if (tx_info->control.vif && + (struct ath9k_htc_vif *) tx_info->control.vif->drv_priv) + vif_idx = ((struct ath9k_htc_vif *) + tx_info->control.vif->drv_priv)->index; + else + vif_idx = priv->nvifs; + if (sta) { ista = (struct ath9k_htc_sta *) sta->drv_priv; sta_idx = ista->index; @@ -106,7 +111,7 @@ int ath9k_htc_tx_start(struct ath9k_htc_priv *priv, struct sk_buff *skb) memset(&tx_hdr, 0, sizeof(struct tx_frame_hdr)); tx_hdr.node_idx = sta_idx; - tx_hdr.vif_idx = avp->index; + tx_hdr.vif_idx = vif_idx; if (tx_info->flags & IEEE80211_TX_CTL_AMPDU) { tx_ctl.type = ATH9K_HTC_AMPDU; @@ -169,7 +174,7 @@ int ath9k_htc_tx_start(struct ath9k_htc_priv *priv, struct sk_buff *skb) tx_ctl.type = ATH9K_HTC_NORMAL; mgmt_hdr.node_idx = sta_idx; - mgmt_hdr.vif_idx = avp->index; + mgmt_hdr.vif_idx = vif_idx; mgmt_hdr.tidno = 0; mgmt_hdr.flags = 0; diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h index 633e3d9..d01c4ad 100644 --- a/drivers/net/wireless/ath/ath9k/reg.h +++ b/drivers/net/wireless/ath/ath9k/reg.h @@ -899,6 +899,7 @@ #define AR_DEVID_7010(_ah) \ (((_ah)->hw_version.devid == 0x7010) || \ + ((_ah)->hw_version.devid == 0x7015) || \ ((_ah)->hw_version.devid == 0x9018)) #define AR_RADIO_SREV_MAJOR 0xf0 diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c index 16bbfa3..1189dbb 100644 --- a/drivers/net/wireless/ipw2x00/ipw2100.c +++ b/drivers/net/wireless/ipw2x00/ipw2100.c @@ -6665,12 +6665,13 @@ static int __init ipw2100_init(void) printk(KERN_INFO DRV_NAME ": %s, %s\n", DRV_DESCRIPTION, DRV_VERSION); printk(KERN_INFO DRV_NAME ": %s\n", DRV_COPYRIGHT); + pm_qos_add_request(&ipw2100_pm_qos_req, PM_QOS_CPU_DMA_LATENCY, + PM_QOS_DEFAULT_VALUE); + ret = pci_register_driver(&ipw2100_pci_driver); if (ret) goto out; - pm_qos_add_request(&ipw2100_pm_qos_req, PM_QOS_CPU_DMA_LATENCY, - PM_QOS_DEFAULT_VALUE); #ifdef CONFIG_IPW2100_DEBUG ipw2100_debug_level = debug; ret = driver_create_file(&ipw2100_pci_driver.driver, diff --git a/drivers/net/wireless/wl12xx/wl1251_cmd.c b/drivers/net/wireless/wl12xx/wl1251_cmd.c index a37b30c..ce3722f 100644 --- a/drivers/net/wireless/wl12xx/wl1251_cmd.c +++ b/drivers/net/wireless/wl12xx/wl1251_cmd.c @@ -484,7 +484,7 @@ int wl1251_cmd_trigger_scan_to(struct wl1251 *wl, u32 timeout) cmd->timeout = timeout; - ret = wl1251_cmd_send(wl, CMD_SCAN, cmd, sizeof(*cmd)); + ret = wl1251_cmd_send(wl, CMD_TRIGGER_SCAN_TO, cmd, sizeof(*cmd)); if (ret < 0) { wl1251_error("cmd trigger scan to failed: %d", ret); goto out; diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c index e058c2b..ca05aef 100644 --- a/drivers/platform/x86/asus_acpi.c +++ b/drivers/platform/x86/asus_acpi.c @@ -938,10 +938,11 @@ static int set_brightness(int value) /* SPLV laptop */ if (hotk->methods->brightness_set) { if (!write_acpi_int(hotk->handle, hotk->methods->brightness_set, - value, NULL)) + value, NULL)) { printk(KERN_WARNING "Asus ACPI: Error changing brightness\n"); ret = -EIO; + } goto out; } @@ -953,10 +954,11 @@ static int set_brightness(int value) hotk->methods->brightness_down, NULL, NULL); (value > 0) ? value-- : value++; - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { printk(KERN_WARNING "Asus ACPI: Error changing brightness\n"); ret = -EIO; + } } out: return ret; diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c index d071ce0..097083c 100644 --- a/drivers/platform/x86/compal-laptop.c +++ b/drivers/platform/x86/compal-laptop.c @@ -841,6 +841,14 @@ static struct dmi_system_id __initdata compal_dmi_table[] = { .callback = dmi_check_cb }, { + .ident = "Dell Mini 1012", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), + }, + .callback = dmi_check_cb + }, + { .ident = "Dell Inspiron 11z", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), @@ -1092,5 +1100,6 @@ MODULE_ALIAS("dmi:*:rnJHL90:rvrREFERENCE:*"); MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron910:*"); MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1010:*"); MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1011:*"); +MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1012:*"); MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1110:*"); MODULE_ALIAS("dmi:*:svnDellInc.:pnInspiron1210:*"); diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index b41ed5c..4413975 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -122,6 +122,13 @@ static struct dmi_system_id __devinitdata dell_blacklist[] = { }, }, { + .ident = "Dell Mini 1012", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), + }, + }, + { .ident = "Dell Inspiron 11z", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c index afe82e5..9024480 100644 --- a/drivers/platform/x86/intel_ips.c +++ b/drivers/platform/x86/intel_ips.c @@ -1342,8 +1342,10 @@ static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips) limits = &ips_lv_limits; else if (strstr(boot_cpu_data.x86_model_id, "CPU U")) limits = &ips_ulv_limits; - else + else { dev_info(&ips->dev->dev, "No CPUID match found.\n"); + goto out; + } rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power); tdp = turbo_power & TURBO_TDP_MASK; @@ -1432,6 +1434,12 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id) spin_lock_init(&ips->turbo_status_lock); + ret = pci_enable_device(dev); + if (ret) { + dev_err(&dev->dev, "can't enable PCI device, aborting\n"); + goto error_free; + } + if (!pci_resource_start(dev, 0)) { dev_err(&dev->dev, "TBAR not assigned, aborting\n"); ret = -ENXIO; @@ -1444,11 +1452,6 @@ static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id) goto error_free; } - ret = pci_enable_device(dev); - if (ret) { - dev_err(&dev->dev, "can't enable PCI device, aborting\n"); - goto error_free; - } ips->regmap = ioremap(pci_resource_start(dev, 0), pci_resource_len(dev, 0)); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 5d6119b..e35ed12 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1911,6 +1911,17 @@ enum { /* hot key scan codes (derived from ACPI DSDT) */ TP_ACPI_HOTKEYSCAN_VOLUMEDOWN, TP_ACPI_HOTKEYSCAN_MUTE, TP_ACPI_HOTKEYSCAN_THINKPAD, + TP_ACPI_HOTKEYSCAN_UNK1, + TP_ACPI_HOTKEYSCAN_UNK2, + TP_ACPI_HOTKEYSCAN_UNK3, + TP_ACPI_HOTKEYSCAN_UNK4, + TP_ACPI_HOTKEYSCAN_UNK5, + TP_ACPI_HOTKEYSCAN_UNK6, + TP_ACPI_HOTKEYSCAN_UNK7, + TP_ACPI_HOTKEYSCAN_UNK8, + + /* Hotkey keymap size */ + TPACPI_HOTKEY_MAP_LEN }; enum { /* Keys/events available through NVRAM polling */ @@ -3082,6 +3093,8 @@ static const struct tpacpi_quirk tpacpi_hotkey_qtable[] __initconst = { TPACPI_Q_IBM('1', 'D', TPACPI_HK_Q_INIMASK), /* X22, X23, X24 */ }; +typedef u16 tpacpi_keymap_t[TPACPI_HOTKEY_MAP_LEN]; + static int __init hotkey_init(struct ibm_init_struct *iibm) { /* Requirements for changing the default keymaps: @@ -3113,9 +3126,17 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) * If the above is too much to ask, don't change the keymap. * Ask the thinkpad-acpi maintainer to do it, instead. */ - static u16 ibm_keycode_map[] __initdata = { + + enum keymap_index { + TPACPI_KEYMAP_IBM_GENERIC = 0, + TPACPI_KEYMAP_LENOVO_GENERIC, + }; + + static const tpacpi_keymap_t tpacpi_keymaps[] __initconst = { + /* Generic keymap for IBM ThinkPads */ + [TPACPI_KEYMAP_IBM_GENERIC] = { /* Scan Codes 0x00 to 0x0B: ACPI HKEY FN+F1..F12 */ - KEY_FN_F1, KEY_FN_F2, KEY_COFFEE, KEY_SLEEP, + KEY_FN_F1, KEY_BATTERY, KEY_COFFEE, KEY_SLEEP, KEY_WLAN, KEY_FN_F6, KEY_SWITCHVIDEOMODE, KEY_FN_F8, KEY_FN_F9, KEY_FN_F10, KEY_FN_F11, KEY_SUSPEND, @@ -3146,11 +3167,13 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) /* (assignments unknown, please report if found) */ KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, - }; - static u16 lenovo_keycode_map[] __initdata = { + }, + + /* Generic keymap for Lenovo ThinkPads */ + [TPACPI_KEYMAP_LENOVO_GENERIC] = { /* Scan Codes 0x00 to 0x0B: ACPI HKEY FN+F1..F12 */ KEY_FN_F1, KEY_COFFEE, KEY_BATTERY, KEY_SLEEP, - KEY_WLAN, KEY_FN_F6, KEY_SWITCHVIDEOMODE, KEY_FN_F8, + KEY_WLAN, KEY_CAMERA, KEY_SWITCHVIDEOMODE, KEY_FN_F8, KEY_FN_F9, KEY_FN_F10, KEY_FN_F11, KEY_SUSPEND, /* Scan codes 0x0C to 0x1F: Other ACPI HKEY hot keys */ @@ -3189,11 +3212,25 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) /* (assignments unknown, please report if found) */ KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, + }, + }; + + static const struct tpacpi_quirk tpacpi_keymap_qtable[] __initconst = { + /* Generic maps (fallback) */ + { + .vendor = PCI_VENDOR_ID_IBM, + .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_ANY, + .quirks = TPACPI_KEYMAP_IBM_GENERIC, + }, + { + .vendor = PCI_VENDOR_ID_LENOVO, + .bios = TPACPI_MATCH_ANY, .ec = TPACPI_MATCH_ANY, + .quirks = TPACPI_KEYMAP_LENOVO_GENERIC, + }, }; -#define TPACPI_HOTKEY_MAP_LEN ARRAY_SIZE(ibm_keycode_map) -#define TPACPI_HOTKEY_MAP_SIZE sizeof(ibm_keycode_map) -#define TPACPI_HOTKEY_MAP_TYPESIZE sizeof(ibm_keycode_map[0]) +#define TPACPI_HOTKEY_MAP_SIZE sizeof(tpacpi_keymap_t) +#define TPACPI_HOTKEY_MAP_TYPESIZE sizeof(tpacpi_keymap_t[0]) int res, i; int status; @@ -3202,6 +3239,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) bool tabletsw_state = false; unsigned long quirks; + unsigned long keymap_id; vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY, "initializing hotkey subdriver\n"); @@ -3342,7 +3380,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) goto err_exit; /* Set up key map */ - hotkey_keycode_map = kmalloc(TPACPI_HOTKEY_MAP_SIZE, GFP_KERNEL); if (!hotkey_keycode_map) { @@ -3352,17 +3389,14 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) goto err_exit; } - if (tpacpi_is_lenovo()) { - dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY, - "using Lenovo default hot key map\n"); - memcpy(hotkey_keycode_map, &lenovo_keycode_map, - TPACPI_HOTKEY_MAP_SIZE); - } else { - dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY, - "using IBM default hot key map\n"); - memcpy(hotkey_keycode_map, &ibm_keycode_map, - TPACPI_HOTKEY_MAP_SIZE); - } + keymap_id = tpacpi_check_quirks(tpacpi_keymap_qtable, + ARRAY_SIZE(tpacpi_keymap_qtable)); + BUG_ON(keymap_id >= ARRAY_SIZE(tpacpi_keymaps)); + dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY, + "using keymap number %lu\n", keymap_id); + + memcpy(hotkey_keycode_map, &tpacpi_keymaps[keymap_id], + TPACPI_HOTKEY_MAP_SIZE); input_set_capability(tpacpi_inputdev, EV_MSC, MSC_SCAN); tpacpi_inputdev->keycodesize = TPACPI_HOTKEY_MAP_TYPESIZE; @@ -3469,7 +3503,8 @@ static bool hotkey_notify_hotkey(const u32 hkey, *send_acpi_ev = true; *ignore_acpi_ev = false; - if (scancode > 0 && scancode < 0x21) { + /* HKEY event 0x1001 is scancode 0x00 */ + if (scancode > 0 && scancode <= TPACPI_HOTKEY_MAP_LEN) { scancode--; if (!(hotkey_source_mask & (1 << scancode))) { tpacpi_input_send_key_masked(scancode); @@ -6080,13 +6115,18 @@ static struct backlight_ops ibm_backlight_data = { /* --------------------------------------------------------------------- */ +/* + * Call _BCL method of video device. On some ThinkPads this will + * switch the firmware to the ACPI brightness control mode. + */ + static int __init tpacpi_query_bcl_levels(acpi_handle handle) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; union acpi_object *obj; int rc; - if (ACPI_SUCCESS(acpi_evaluate_object(handle, NULL, NULL, &buffer))) { + if (ACPI_SUCCESS(acpi_evaluate_object(handle, "_BCL", NULL, &buffer))) { obj = (union acpi_object *)buffer.pointer; if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) { printk(TPACPI_ERR "Unknown _BCL data, " @@ -6103,55 +6143,22 @@ static int __init tpacpi_query_bcl_levels(acpi_handle handle) return rc; } -static acpi_status __init tpacpi_acpi_walk_find_bcl(acpi_handle handle, - u32 lvl, void *context, void **rv) -{ - char name[ACPI_PATH_SEGMENT_LENGTH]; - struct acpi_buffer buffer = { sizeof(name), &name }; - - if (ACPI_SUCCESS(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)) && - !strncmp("_BCL", name, sizeof(name) - 1)) { - BUG_ON(!rv || !*rv); - **(int **)rv = tpacpi_query_bcl_levels(handle); - return AE_CTRL_TERMINATE; - } else { - return AE_OK; - } -} /* * Returns 0 (no ACPI _BCL or _BCL invalid), or size of brightness map */ static unsigned int __init tpacpi_check_std_acpi_brightness_support(void) { - int status; + acpi_handle video_device; int bcl_levels = 0; - void *bcl_ptr = &bcl_levels; - - if (!vid_handle) - TPACPI_ACPIHANDLE_INIT(vid); - - if (!vid_handle) - return 0; - - /* - * Search for a _BCL method, and execute it. This is safe on all - * ThinkPads, and as a side-effect, _BCL will place a Lenovo Vista - * BIOS in ACPI backlight control mode. We do NOT have to care - * about calling the _BCL method in an enabled video device, any - * will do for our purposes. - */ - status = acpi_walk_namespace(ACPI_TYPE_METHOD, vid_handle, 3, - tpacpi_acpi_walk_find_bcl, NULL, NULL, - &bcl_ptr); + tpacpi_acpi_handle_locate("video", ACPI_VIDEO_HID, &video_device); + if (video_device) + bcl_levels = tpacpi_query_bcl_levels(video_device); - if (ACPI_SUCCESS(status) && bcl_levels > 2) { - tp_features.bright_acpimode = 1; - return bcl_levels - 2; - } + tp_features.bright_acpimode = (bcl_levels > 0); - return 0; + return (bcl_levels > 2) ? (bcl_levels - 2) : 0; } /* @@ -6244,28 +6251,6 @@ static int __init brightness_init(struct ibm_init_struct *iibm) if (tp_features.bright_unkfw) return 1; - if (tp_features.bright_acpimode) { - if (acpi_video_backlight_support()) { - if (brightness_enable > 1) { - printk(TPACPI_NOTICE - "Standard ACPI backlight interface " - "available, not loading native one.\n"); - return 1; - } else if (brightness_enable == 1) { - printk(TPACPI_NOTICE - "Backlight control force enabled, even if standard " - "ACPI backlight interface is available\n"); - } - } else { - if (brightness_enable > 1) { - printk(TPACPI_NOTICE - "Standard ACPI backlight interface not " - "available, thinkpad_acpi native " - "brightness control enabled\n"); - } - } - } - if (!brightness_enable) { dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_BRGHT, "brightness support disabled by " @@ -6273,6 +6258,26 @@ static int __init brightness_init(struct ibm_init_struct *iibm) return 1; } + if (acpi_video_backlight_support()) { + if (brightness_enable > 1) { + printk(TPACPI_INFO + "Standard ACPI backlight interface " + "available, not loading native one.\n"); + return 1; + } else if (brightness_enable == 1) { + printk(TPACPI_WARN + "Cannot enable backlight brightness support, " + "ACPI is already handling it. Refer to the " + "acpi_backlight kernel parameter\n"); + return 1; + } + } else if (tp_features.bright_acpimode && brightness_enable > 1) { + printk(TPACPI_NOTICE + "Standard ACPI backlight interface not " + "available, thinkpad_acpi native " + "brightness control enabled\n"); + } + /* * Check for module parameter bogosity, note that we * init brightness_mode to TPACPI_BRGHT_MODE_MAX in order to be diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c index 95a895d..c8dc392 100644 --- a/drivers/scsi/arcmsr/arcmsr_hba.c +++ b/drivers/scsi/arcmsr/arcmsr_hba.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/serial/of_serial.c b/drivers/serial/of_serial.c index 659a695..2af8fd1 100644 --- a/drivers/serial/of_serial.c +++ b/drivers/serial/of_serial.c @@ -14,11 +14,10 @@ #include #include #include +#include #include #include -#include - struct of_serial_info { int type; int line; diff --git a/drivers/serial/suncore.c b/drivers/serial/suncore.c index 544f2e2..6381a02 100644 --- a/drivers/serial/suncore.c +++ b/drivers/serial/suncore.c @@ -55,7 +55,12 @@ EXPORT_SYMBOL(sunserial_unregister_minors); int sunserial_console_match(struct console *con, struct device_node *dp, struct uart_driver *drv, int line, bool ignore_line) { - if (!con || of_console_device != dp) + if (!con) + return 0; + + drv->cons = con; + + if (of_console_device != dp) return 0; if (!ignore_line) { @@ -69,12 +74,10 @@ int sunserial_console_match(struct console *con, struct device_node *dp, return 0; } - con->index = line; - drv->cons = con; - - if (!console_set_on_cmdline) + if (!console_set_on_cmdline) { + con->index = line; add_preferred_console(con->name, line, NULL); - + } return 1; } EXPORT_SYMBOL(sunserial_console_match); diff --git a/drivers/spi/coldfire_qspi.c b/drivers/spi/coldfire_qspi.c index 59be3ef..052b3c7 100644 --- a/drivers/spi/coldfire_qspi.c +++ b/drivers/spi/coldfire_qspi.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/staging/pohmelfs/path_entry.c b/drivers/staging/pohmelfs/path_entry.c index cdc4dd5..8ec83d2 100644 --- a/drivers/staging/pohmelfs/path_entry.c +++ b/drivers/staging/pohmelfs/path_entry.c @@ -44,9 +44,9 @@ int pohmelfs_construct_path_string(struct pohmelfs_inode *pi, void *data, int le return -ENOENT; } - read_lock(¤t->fs->lock); + spin_lock(¤t->fs->lock); path.mnt = mntget(current->fs->root.mnt); - read_unlock(¤t->fs->lock); + spin_unlock(¤t->fs->lock); path.dentry = d; @@ -91,9 +91,9 @@ int pohmelfs_path_length(struct pohmelfs_inode *pi) return -ENOENT; } - read_lock(¤t->fs->lock); + spin_lock(¤t->fs->lock); root = dget(current->fs->root.dentry); - read_unlock(¤t->fs->lock); + spin_unlock(¤t->fs->lock); spin_lock(&dcache_lock); diff --git a/drivers/video/amba-clcd.c b/drivers/video/amba-clcd.c index afe21e6..1c2c683 100644 --- a/drivers/video/amba-clcd.c +++ b/drivers/video/amba-clcd.c @@ -80,7 +80,10 @@ static void clcdfb_disable(struct clcd_fb *fb) /* * Disable CLCD clock source. */ - clk_disable(fb->clk); + if (fb->clk_enabled) { + fb->clk_enabled = false; + clk_disable(fb->clk); + } } static void clcdfb_enable(struct clcd_fb *fb, u32 cntl) @@ -88,7 +91,10 @@ static void clcdfb_enable(struct clcd_fb *fb, u32 cntl) /* * Enable the CLCD clock source. */ - clk_enable(fb->clk); + if (!fb->clk_enabled) { + fb->clk_enabled = true; + clk_enable(fb->clk); + } /* * Bring up by first enabling.. diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 9e60fd2..a7528b9 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -108,7 +108,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) Node *fmt; struct file * interp_file = NULL; char iname[BINPRM_BUF_SIZE]; - char *iname_addr = iname; + const char *iname_addr = iname; int retval; int fd_binary = -1; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index aca9d55..396a988 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -16,7 +16,8 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) { - char *cp, *i_name, *i_arg; + const char *i_arg, *i_name; + char *cp; struct file *file; char interp[BINPRM_BUF_SIZE]; int retval; diff --git a/fs/buffer.c b/fs/buffer.c index 50efa33..3e7dca2 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) spin_unlock(lock); /* * Ensure any pending I/O completes so that - * ll_rw_block() actually writes the current - * contents - it is a noop if I/O is still in - * flight on potentially older contents. + * write_dirty_buffer() actually writes the + * current contents - it is a noop if I/O is + * still in flight on potentially older + * contents. */ - ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, WRITE_SYNC_PLUG); /* * Kick off IO for the previous mapping. Note @@ -2912,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(buffer_unwritten(bh)); /* - * Mask in barrier bit for a write (could be either a WRITE or a - * WRITE_SYNC - */ - if (buffer_ordered(bh) && (rw & WRITE)) - rw |= WRITE_BARRIER; - - /* * Only clear out a write error when rewriting */ if (test_set_buffer_req(bh) && (rw & WRITE)) @@ -2956,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * * ll_rw_block() takes an array of pointers to &struct buffer_heads, and * requests an I/O operation on them, either a %READ or a %WRITE. The third - * %SWRITE is like %WRITE only we make sure that the *current* data in buffers - * are sent to disk. The fourth %READA option is described in the documentation - * for generic_make_request() which ll_rw_block() calls. + * %READA option is described in the documentation for generic_make_request() + * which ll_rw_block() calls. * * This function drops any buffer that it cannot get a lock on (with the - * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be - * clean when doing a write request, and any buffer that appears to be - * up-to-date when doing read request. Further it marks as clean buffers that - * are processed for writing (the buffer cache won't assume that they are - * actually clean until the buffer gets unlocked). + * BH_Lock state bit), any buffer that appears to be clean when doing a write + * request, and any buffer that appears to be up-to-date when doing read + * request. Further it marks as clean buffers that are processed for + * writing (the buffer cache won't assume that they are actually clean + * until the buffer gets unlocked). * * ll_rw_block sets b_end_io to simple completion handler that marks * the buffer up-to-date (if approriate), unlocks the buffer and wakes @@ -2987,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) - lock_buffer(bh); - else if (!trylock_buffer(bh)) + if (!trylock_buffer(bh)) continue; - - if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || - rw == SWRITE_SYNC_PLUG) { + if (rw == WRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - if (rw == SWRITE_SYNC) - submit_bh(WRITE_SYNC, bh); - else - submit_bh(WRITE, bh); + submit_bh(WRITE, bh); continue; } } else { @@ -3016,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) } EXPORT_SYMBOL(ll_rw_block); +void write_dirty_buffer(struct buffer_head *bh, int rw) +{ + lock_buffer(bh); + if (!test_clear_buffer_dirty(bh)) { + unlock_buffer(bh); + return; + } + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(rw, bh); +} +EXPORT_SYMBOL(write_dirty_buffer); + /* * For a data-integrity writeout, we need to wait upon any in-progress I/O * and then start new I/O and then wait upon it. The caller must have a ref on * the buffer_head. */ -int sync_dirty_buffer(struct buffer_head *bh) +int __sync_dirty_buffer(struct buffer_head *bh, int rw) { int ret = 0; @@ -3030,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE_SYNC, bh); + ret = submit_bh(rw, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); @@ -3043,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh) } return ret; } +EXPORT_SYMBOL(__sync_dirty_buffer); + +int sync_dirty_buffer(struct buffer_head *bh) +{ + return __sync_dirty_buffer(bh, WRITE_SYNC); +} EXPORT_SYMBOL(sync_dirty_buffer); /* diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index a53b130..1e7a330 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -80,7 +80,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, } } else { inode = iget_locked(sb, CRAMINO(cramfs_inode)); - if (inode) { + if (inode && (inode->i_state & I_NEW)) { setup_inode(inode, cramfs_inode); unlock_new_inode(inode); } diff --git a/fs/dcache.c b/fs/dcache.c index 4d13bf5..83293be 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci); * d_lookup - search for a dentry * @parent: parent dentry * @name: qstr of name we wish to find + * Returns: dentry, or NULL * - * Searches the children of the parent dentry for the name in question. If - * the dentry is found its reference count is incremented and the dentry - * is returned. The caller must use dput to free the entry when it has - * finished using it. %NULL is returned on failure. - * - * __d_lookup is dcache_lock free. The hash list is protected using RCU. - * Memory barriers are used while updating and doing lockless traversal. - * To avoid races with d_move while rename is happening, d_lock is used. - * - * Overflows in memcmp(), while d_move, are avoided by keeping the length - * and name pointer in one structure pointed by d_qstr. - * - * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while - * lookup is going on. - * - * The dentry unused LRU is not updated even if lookup finds the required dentry - * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, - * select_parent and __dget_locked. This laziness saves lookup from dcache_lock - * acquisition. - * - * d_lookup() is protected against the concurrent renames in some unrelated - * directory using the seqlockt_t rename_lock. + * d_lookup searches the children of the parent dentry for the name in + * question. If the dentry is found its reference count is incremented and the + * dentry is returned. The caller must use dput to free the entry when it has + * finished using it. %NULL is returned if the dentry does not exist. */ - struct dentry * d_lookup(struct dentry * parent, struct qstr * name) { struct dentry * dentry = NULL; @@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) } EXPORT_SYMBOL(d_lookup); +/* + * __d_lookup - search for a dentry (racy) + * @parent: parent dentry + * @name: qstr of name we wish to find + * Returns: dentry, or NULL + * + * __d_lookup is like d_lookup, however it may (rarely) return a + * false-negative result due to unrelated rename activity. + * + * __d_lookup is slightly faster by avoiding rename_lock read seqlock, + * however it must be used carefully, eg. with a following d_lookup in + * the case of failure. + * + * __d_lookup callers must be commented. + */ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) { unsigned int len = name->len; @@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) struct hlist_node *node; struct dentry *dentry; + /* + * The hash list is protected using RCU. + * + * Take d_lock when comparing a candidate dentry, to avoid races + * with d_move(). + * + * It is possible that concurrent renames can mess up our list + * walk here and result in missing our dentry, resulting in the + * false-negative result. d_lookup() protects against concurrent + * renames using rename_lock seqlock. + * + * See Documentation/vfs/dcache-locking.txt for more details. + */ rcu_read_lock(); hlist_for_each_entry_rcu(dentry, node, head, d_hash) { @@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) /* * Recheck the dentry after taking the lock - d_move may have - * changed things. Don't bother checking the hash because we're - * about to compare the whole name anyway. + * changed things. Don't bother checking the hash because + * we're about to compare the whole name anyway. */ if (dentry->d_parent != parent) goto next; @@ -1925,7 +1935,7 @@ static int prepend_path(const struct path *path, struct path *root, bool slash = false; int error = 0; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -1954,7 +1964,7 @@ out: if (!error && !slash) error = prepend(buffer, buflen, "/", 1); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return error; global_root: @@ -2292,11 +2302,12 @@ int path_is_under(struct path *path1, struct path *path2) struct vfsmount *mnt = path1->mnt; struct dentry *dentry = path1->dentry; int res; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); if (mnt != path2->mnt) { for (;;) { if (mnt->mnt_parent == mnt) { - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return 0; } if (mnt->mnt_parent == path2->mnt) @@ -2306,7 +2317,7 @@ int path_is_under(struct path *path1, struct path *path2) dentry = mnt->mnt_mountpoint; } res = is_subdir(dentry, path2->dentry); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); diff --git a/fs/exec.c b/fs/exec.c index 7761837..2d94552 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -361,13 +361,13 @@ err: /* * count() counts the number of strings in array ARGV. */ -static int count(char __user * __user * argv, int max) +static int count(const char __user * const __user * argv, int max) { int i = 0; if (argv != NULL) { for (;;) { - char __user * p; + const char __user * p; if (get_user(p, argv)) return -EFAULT; @@ -387,7 +387,7 @@ static int count(char __user * __user * argv, int max) * processes's memory to the new process's stack. The call to get_user_pages() * ensures the destination page is created and not swapped out. */ -static int copy_strings(int argc, char __user * __user * argv, +static int copy_strings(int argc, const char __user *const __user *argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; @@ -396,7 +396,7 @@ static int copy_strings(int argc, char __user * __user * argv, int ret; while (argc-- > 0) { - char __user *str; + const char __user *str; int len; unsigned long pos; @@ -470,12 +470,13 @@ out: /* * Like copy_strings, but get argv and its values from kernel memory. */ -int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm) +int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm) { int r; mm_segment_t oldfs = get_fs(); set_fs(KERNEL_DS); - r = copy_strings(argc, (char __user * __user *)argv, bprm); + r = copy_strings(argc, (const char __user *const __user *)argv, bprm); set_fs(oldfs); return r; } @@ -997,7 +998,7 @@ EXPORT_SYMBOL(flush_old_exec); void setup_new_exec(struct linux_binprm * bprm) { int i, ch; - char * name; + const char *name; char tcomm[sizeof(current->comm)]; arch_pick_mmap_layout(current->mm); @@ -1117,7 +1118,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) bprm->unsafe = tracehook_unsafe_exec(p); n_fs = 1; - write_lock(&p->fs->lock); + spin_lock(&p->fs->lock); rcu_read_lock(); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) @@ -1134,7 +1135,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) res = 1; } } - write_unlock(&p->fs->lock); + spin_unlock(&p->fs->lock); return res; } @@ -1316,9 +1317,9 @@ EXPORT_SYMBOL(search_binary_handler); /* * sys_execve() executes a new program. */ -int do_execve(char * filename, - char __user *__user *argv, - char __user *__user *envp, +int do_execve(const char * filename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs * regs) { struct linux_binprm *bprm; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1fa23f6..1736f23 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { int i, err = 0; - ll_rw_block(SWRITE, nr_bhs, bhs); + for (i = 0; i < nr_bhs; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); if (buffer_eopnotsupp(bhs[i])) { diff --git a/fs/file_table.c b/fs/file_table.c index edecd36..a04bdd8 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #include @@ -32,8 +34,8 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -/* public. Not pretty! */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +DECLARE_LGLOCK(files_lglock); +DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -249,7 +251,7 @@ static void __fput(struct file *file) cdev_put(inode->i_cdev); fops_put(file->f_op); put_pid(file->f_owner.pid); - file_kill(file); + file_sb_list_del(file); if (file->f_mode & FMODE_WRITE) drop_file_write_access(file); file->f_path.dentry = NULL; @@ -328,41 +330,107 @@ struct file *fget_light(unsigned int fd, int *fput_needed) return file; } - void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); - file_kill(file); + file_sb_list_del(file); file_free(file); } } -void file_move(struct file *file, struct list_head *list) +static inline int file_list_cpu(struct file *file) { - if (!list) - return; - file_list_lock(); - list_move(&file->f_u.fu_list, list); - file_list_unlock(); +#ifdef CONFIG_SMP + return file->f_sb_list_cpu; +#else + return smp_processor_id(); +#endif +} + +/* helper for file_sb_list_add to reduce ifdefs */ +static inline void __file_sb_list_add(struct file *file, struct super_block *sb) +{ + struct list_head *list; +#ifdef CONFIG_SMP + int cpu; + cpu = smp_processor_id(); + file->f_sb_list_cpu = cpu; + list = per_cpu_ptr(sb->s_files, cpu); +#else + list = &sb->s_files; +#endif + list_add(&file->f_u.fu_list, list); } -void file_kill(struct file *file) +/** + * file_sb_list_add - add a file to the sb's file list + * @file: file to add + * @sb: sb to add it to + * + * Use this function to associate a file with the superblock of the inode it + * refers to. + */ +void file_sb_list_add(struct file *file, struct super_block *sb) +{ + lg_local_lock(files_lglock); + __file_sb_list_add(file, sb); + lg_local_unlock(files_lglock); +} + +/** + * file_sb_list_del - remove a file from the sb's file list + * @file: file to remove + * @sb: sb to remove it from + * + * Use this function to remove a file from its superblock. + */ +void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - file_list_lock(); + lg_local_lock_cpu(files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - file_list_unlock(); + lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); } } +#ifdef CONFIG_SMP + +/* + * These macros iterate all files on all CPUs for a given superblock. + * files_lglock must be held globally. + */ +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + int i; \ + for_each_possible_cpu(i) { \ + struct list_head *list; \ + list = per_cpu_ptr((__sb)->s_files, i); \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ + } \ +} + +#else + +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + struct list_head *list; \ + list = &(sb)->s_files; \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ +} + +#endif + int fs_may_remount_ro(struct super_block *sb) { struct file *file; - /* Check that no files are currently opened for writing. */ - file_list_lock(); - list_for_each_entry(file, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, file) { struct inode *inode = file->f_path.dentry->d_inode; /* File with pending delete? */ @@ -372,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb) /* Writeable file? */ if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; - } - file_list_unlock(); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); return 1; /* Tis' cool bro. */ too_bad: - file_list_unlock(); + lg_global_unlock(files_lglock); return 0; } @@ -392,8 +460,8 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - file_list_lock(); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) continue; @@ -408,16 +476,13 @@ retry: continue; file_release_write(f); mnt = mntget(f->f_path.mnt); - file_list_unlock(); - /* - * This can sleep, so we can't hold - * the file_list_lock() spinlock. - */ + /* This can sleep, so we can't hold the spinlock. */ + lg_global_unlock(files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; - } - file_list_unlock(); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); } void __init files_init(unsigned long mempages) @@ -437,5 +502,6 @@ void __init files_init(unsigned long mempages) if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); + lg_lock_init(files_lglock); percpu_counter_init(&nr_files, 0); } diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 1ee40eb..ed45a9c 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) { struct path old_root; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_root = fs->root; fs->root = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_root.dentry) path_put(&old_root); } @@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) { struct path old_pwd; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_pwd = fs->pwd; fs->pwd = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_pwd.dentry) path_put(&old_pwd); @@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) task_lock(p); fs = p->fs; if (fs) { - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { path_get(new_root); @@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) fs->pwd = *new_root; count++; } - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } task_unlock(p); } while_each_thread(g, p); @@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk) if (fs) { int kill; task_lock(tsk); - write_lock(&fs->lock); + spin_lock(&fs->lock); tsk->fs = NULL; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(tsk); if (kill) free_fs_struct(fs); @@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) if (fs) { fs->users = 1; fs->in_exec = 0; - rwlock_init(&fs->lock); + spin_lock_init(&fs->lock); fs->umask = old->umask; get_fs_root_and_pwd(old, &fs->root, &fs->pwd); } @@ -121,10 +121,10 @@ int unshare_fs_struct(void) return -ENOMEM; task_lock(current); - write_lock(&fs->lock); + spin_lock(&fs->lock); kill = !--fs->users; current->fs = new_fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) @@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask); /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { .users = 1, - .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), .umask = 0022, }; @@ -156,14 +156,14 @@ void daemonize_fs_struct(void) task_lock(current); - write_lock(&init_fs.lock); + spin_lock(&init_fs.lock); init_fs.users++; - write_unlock(&init_fs.lock); + spin_unlock(&init_fs.lock); - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = &init_fs; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 99800e5..6bc9e3a 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -94,6 +94,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, if (error < 0) goto failed; inode->i_mode = mode; + inode->i_ctime = CURRENT_TIME; if (error == 0) { posix_acl_release(acl); acl = NULL; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index dd1e555..f7dc9b5 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -104,7 +104,7 @@ static char *__dentry_name(struct dentry *dentry, char *name) __putname(name); return NULL; } - strncpy(name, root, PATH_MAX); + strlcpy(name, root, PATH_MAX); if (len > p - name) { __putname(name); return NULL; @@ -876,7 +876,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) char *path = dentry_name(dentry); int err = -ENOMEM; if (path) { - int err = hostfs_do_readlink(path, link, PATH_MAX); + err = hostfs_do_readlink(path, link, PATH_MAX); if (err == PATH_MAX) err = -E2BIG; __putname(path); diff --git a/fs/internal.h b/fs/internal.h index 6b706bc..a6910e9 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -9,6 +9,8 @@ * 2 of the License, or (at your option) any later version. */ +#include + struct super_block; struct linux_binprm; struct path; @@ -70,7 +72,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern void __init mnt_init(void); -extern spinlock_t vfsmount_lock; +DECLARE_BRLOCK(vfsmount_lock); + /* * fs_struct.c @@ -80,6 +83,8 @@ extern void chroot_fs_refs(struct path *, struct path *); /* * file_table.c */ +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); extern void mark_files_ro(struct super_block *); extern struct file *get_empty_filp(void); diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index b0435dd..05a38b9 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 28a9dda..95d8c11 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal, struct buffer_head *bh; journal_header_t *header; int ret; - int barrier_done = 0; if (is_journal_aborted(journal)) return 0; @@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); + if (journal->j_flags & JFS_BARRIER) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = sync_dirty_buffer(bh); - if (barrier_done) - clear_buffer_ordered(bh); - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - char b[BDEVNAME_SIZE]; + ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); - spin_lock(&journal->j_state_lock); - journal->j_flags &= ~JFS_BARRIER; - spin_unlock(&journal->j_state_lock); + /* + * Is it possible for another commit to fail at roughly + * the same time as this one? If so, we don't want to + * trust the barrier flag in the super, but instead want + * to remember if we sent a barrier request + */ + if (ret == -EOPNOTSUPP) { + char b[BDEVNAME_SIZE]; - /* And try again, without the barrier */ - set_buffer_uptodate(bh); - set_buffer_dirty(bh); + printk(KERN_WARNING + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", + bdevname(journal->j_dev, b)); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JFS_BARRIER; + spin_unlock(&journal->j_state_lock); + + /* And try again, without the barrier */ + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + ret = sync_dirty_buffer(bh); + } + } else { ret = sync_dirty_buffer(bh); } + put_bh(bh); /* One for getblk() */ journal_put_journal_head(descriptor); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f19ce94..2c4b1f1 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait) if (wait) sync_dirty_buffer(bh); else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index ad71732..d290183 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1c23a0f..5247e7f 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = journal->j_chkpt_bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f52e5e8..7c068c1 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; int ret; - int barrier_done = 0; struct timespec now = current_kernel_time(); if (is_journal_aborted(journal)) @@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = submit_bh(WRITE_SYNC_PLUG, bh); - if (barrier_done) - clear_buffer_ordered(bh); - - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - printk(KERN_WARNING - "JBD2: Disabling barriers on %s, " - "not supported by device\n", journal->j_devname); - write_lock(&journal->j_state_lock); - journal->j_flags &= ~JBD2_BARRIER; - write_unlock(&journal->j_state_lock); + ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); + if (ret == -EOPNOTSUPP) { + printk(KERN_WARNING + "JBD2: Disabling barriers on %s, " + "not supported by device\n", journal->j_devname); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_BARRIER; + write_unlock(&journal->j_state_lock); - /* And try again, without the barrier */ - lock_buffer(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); + /* And try again, without the barrier */ + lock_buffer(bh); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + ret = submit_bh(WRITE_SYNC_PLUG, bh); + } + } else { ret = submit_bh(WRITE_SYNC_PLUG, bh); } *cbh = bh; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ad5866a..0e8014e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) set_buffer_uptodate(bh); } } else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a360b06..9ad321f 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/mbcache.c b/fs/mbcache.c index cf4e6cd..9344474 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -80,6 +80,7 @@ struct mb_cache { struct list_head c_cache_list; const char *c_name; atomic_t c_entry_count; + int c_max_entries; int c_bucket_bits; struct kmem_cache *c_entry_cache; struct list_head *c_block_hash; @@ -243,6 +244,12 @@ mb_cache_create(const char *name, int bucket_bits) if (!cache->c_entry_cache) goto fail2; + /* + * Set an upper limit on the number of cache entries so that the hash + * chains won't grow too long. + */ + cache->c_max_entries = bucket_count << 4; + spin_lock(&mb_cache_spinlock); list_add(&cache->c_cache_list, &mb_cache_list); spin_unlock(&mb_cache_spinlock); @@ -333,7 +340,6 @@ mb_cache_destroy(struct mb_cache *cache) kfree(cache); } - /* * mb_cache_entry_alloc() * @@ -345,17 +351,29 @@ mb_cache_destroy(struct mb_cache *cache) struct mb_cache_entry * mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) { - struct mb_cache_entry *ce; - - ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); - if (ce) { + struct mb_cache_entry *ce = NULL; + + if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { + spin_lock(&mb_cache_spinlock); + if (!list_empty(&mb_cache_lru_list)) { + ce = list_entry(mb_cache_lru_list.next, + struct mb_cache_entry, e_lru_list); + list_del_init(&ce->e_lru_list); + __mb_cache_entry_unhash(ce); + } + spin_unlock(&mb_cache_spinlock); + } + if (!ce) { + ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); + if (!ce) + return NULL; atomic_inc(&cache->c_entry_count); INIT_LIST_HEAD(&ce->e_lru_list); INIT_LIST_HEAD(&ce->e_block_list); ce->e_cache = cache; - ce->e_used = 1 + MB_CACHE_WRITER; ce->e_queued = 0; } + ce->e_used = 1 + MB_CACHE_WRITER; return ce; } diff --git a/fs/namei.c b/fs/namei.c index 17ea76b..24896e8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -595,15 +595,16 @@ int follow_up(struct path *path) { struct vfsmount *parent; struct dentry *mountpoint; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); parent = path->mnt->mnt_parent; if (parent == path->mnt) { - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return 0; } mntget(parent); mountpoint = dget(path->mnt->mnt_mountpoint); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -686,6 +687,35 @@ static __always_inline void follow_dotdot(struct nameidata *nd) } /* + * Allocate a dentry with name and parent, and perform a parent + * directory ->lookup on it. Returns the new dentry, or ERR_PTR + * on error. parent->d_inode->i_mutex must be held. d_lookup must + * have verified that no child exists while under i_mutex. + */ +static struct dentry *d_alloc_and_lookup(struct dentry *parent, + struct qstr *name, struct nameidata *nd) +{ + struct inode *inode = parent->d_inode; + struct dentry *dentry; + struct dentry *old; + + /* Don't create child dentry for a dead directory. */ + if (unlikely(IS_DEADDIR(inode))) + return ERR_PTR(-ENOENT); + + dentry = d_alloc(parent, name); + if (unlikely(!dentry)) + return ERR_PTR(-ENOMEM); + + old = inode->i_op->lookup(inode, dentry, nd); + if (unlikely(old)) { + dput(dentry); + dentry = old; + } + return dentry; +} + +/* * It's more convoluted than I'd like it to be, but... it's still fairly * small and for now I'd prefer to have fast path as straight as possible. * It _is_ time-critical. @@ -706,9 +736,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return err; } + /* + * Rename seqlock is not required here because in the off chance + * of a false negative due to a concurrent rename, we're going to + * do the non-racy lookup, below. + */ dentry = __d_lookup(nd->path.dentry, name); if (!dentry) goto need_lookup; +found: if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; done: @@ -724,56 +760,28 @@ need_lookup: mutex_lock(&dir->i_mutex); /* * First re-do the cached lookup just in case it was created - * while we waited for the directory semaphore.. + * while we waited for the directory semaphore, or the first + * lookup failed due to an unrelated rename. * - * FIXME! This could use version numbering or similar to - * avoid unnecessary cache lookups. - * - * The "dcache_lock" is purely to protect the RCU list walker - * from concurrent renames at this point (we mustn't get false - * negatives from the RCU list walk here, unlike the optimistic - * fast walk). - * - * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup + * This could use version numbering or similar to avoid unnecessary + * cache lookups, but then we'd have to do the first lookup in the + * non-racy way. However in the common case here, everything should + * be hot in cache, so would it be a big win? */ dentry = d_lookup(parent, name); - if (!dentry) { - struct dentry *new; - - /* Don't create child dentry for a dead directory. */ - dentry = ERR_PTR(-ENOENT); - if (IS_DEADDIR(dir)) - goto out_unlock; - - new = d_alloc(parent, name); - dentry = ERR_PTR(-ENOMEM); - if (new) { - dentry = dir->i_op->lookup(dir, new, nd); - if (dentry) - dput(new); - else - dentry = new; - } -out_unlock: + if (likely(!dentry)) { + dentry = d_alloc_and_lookup(parent, name, nd); mutex_unlock(&dir->i_mutex); if (IS_ERR(dentry)) goto fail; goto done; } - /* * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ mutex_unlock(&dir->i_mutex); - if (dentry->d_op && dentry->d_op->d_revalidate) { - dentry = do_revalidate(dentry, nd); - if (!dentry) - dentry = ERR_PTR(-ENOENT); - } - if (IS_ERR(dentry)) - goto fail; - goto done; + goto found; need_revalidate: dentry = do_revalidate(dentry, nd); @@ -1130,35 +1138,18 @@ static struct dentry *__lookup_hash(struct qstr *name, goto out; } - dentry = __d_lookup(base, name); - - /* lockess __d_lookup may fail due to concurrent d_move() - * in some unrelated directory, so try with d_lookup + /* + * Don't bother with __d_lookup: callers are for creat as + * well as unlink, so a lot of the time it would cost + * a double lookup. */ - if (!dentry) - dentry = d_lookup(base, name); + dentry = d_lookup(base, name); if (dentry && dentry->d_op && dentry->d_op->d_revalidate) dentry = do_revalidate(dentry, nd); - if (!dentry) { - struct dentry *new; - - /* Don't create child dentry for a dead directory. */ - dentry = ERR_PTR(-ENOENT); - if (IS_DEADDIR(inode)) - goto out; - - new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - dentry = inode->i_op->lookup(inode, new, nd); - if (!dentry) - dentry = new; - else - dput(new); - } + if (!dentry) + dentry = d_alloc_and_lookup(base, name, nd); out: return dentry; } diff --git a/fs/namespace.c b/fs/namespace.c index 2e10cb1..de402eb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include #include #include @@ -38,12 +40,10 @@ #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) #define HASH_SIZE (1UL << HASH_SHIFT) -/* spinlock for vfsmount related operations, inplace of dcache_lock */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); - static int event; static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); +static DEFINE_SPINLOCK(mnt_id_lock); static int mnt_id_start = 0; static int mnt_group_start = 1; @@ -55,6 +55,16 @@ static struct rw_semaphore namespace_sem; struct kobject *fs_kobj; EXPORT_SYMBOL_GPL(fs_kobj); +/* + * vfsmount lock may be taken for read to prevent changes to the + * vfsmount hash, ie. during mountpoint lookups or walking back + * up the tree. + * + * It should be taken for write in all cases where the vfsmount + * tree or hash is modified or when a vfsmount structure is modified. + */ +DEFINE_BRLOCK(vfsmount_lock); + static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); @@ -65,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) -/* allocation is serialized by namespace_sem */ +/* + * allocation is serialized by namespace_sem, but we need the spinlock to + * serialize with freeing. + */ static int mnt_alloc_id(struct vfsmount *mnt) { int res; retry: ida_pre_get(&mnt_id_ida, GFP_KERNEL); - spin_lock(&vfsmount_lock); + spin_lock(&mnt_id_lock); res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); if (!res) mnt_id_start = mnt->mnt_id + 1; - spin_unlock(&vfsmount_lock); + spin_unlock(&mnt_id_lock); if (res == -EAGAIN) goto retry; @@ -86,11 +99,11 @@ retry: static void mnt_free_id(struct vfsmount *mnt) { int id = mnt->mnt_id; - spin_lock(&vfsmount_lock); + spin_lock(&mnt_id_lock); ida_remove(&mnt_id_ida, id); if (mnt_id_start > id) mnt_id_start = id; - spin_unlock(&vfsmount_lock); + spin_unlock(&mnt_id_lock); } /* @@ -348,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt) { int ret = 0; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -382,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt) */ smp_wmb(); mnt->mnt_flags &= ~MNT_WRITE_HOLD; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_flags &= ~MNT_READONLY; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) @@ -414,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt) /* * find the first or last mount at @dentry on vfsmount @mnt depending on * @dir. If @dir is set return the first mount else return the last mount. + * vfsmount_lock must be held for read or write. */ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, int dir) @@ -443,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *lookup_mnt(struct path *path) { struct vfsmount *child_mnt; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) mntget(child_mnt); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return child_mnt; } @@ -455,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt) return mnt->mnt_ns == current->nsproxy->mnt_ns; } +/* + * vfsmount lock must be held for write + */ static void touch_mnt_namespace(struct mnt_namespace *ns) { if (ns) { @@ -463,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns) } } +/* + * vfsmount lock must be held for write + */ static void __touch_mnt_namespace(struct mnt_namespace *ns) { if (ns && ns->event != event) { @@ -471,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) } } +/* + * vfsmount lock must be held for write + */ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) { old_path->dentry = mnt->mnt_mountpoint; @@ -482,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) old_path->dentry->d_mounted--; } +/* + * vfsmount lock must be held for write + */ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *child_mnt) { @@ -490,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, dentry->d_mounted++; } +/* + * vfsmount lock must be held for write + */ static void attach_mnt(struct vfsmount *mnt, struct path *path) { mnt_set_mountpoint(path->mnt, path->dentry, mnt); @@ -499,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) } /* - * the caller must hold vfsmount_lock + * vfsmount lock must be held for write */ static void commit_tree(struct vfsmount *mnt) { @@ -623,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt) void mntput_no_expire(struct vfsmount *mnt) { repeat: - if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { - if (likely(!mnt->mnt_pinned)) { - spin_unlock(&vfsmount_lock); - __mntput(mnt); - return; - } - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); - mnt->mnt_pinned = 0; - spin_unlock(&vfsmount_lock); - acct_auto_close_mnt(mnt); - goto repeat; + if (atomic_add_unless(&mnt->mnt_count, -1, 1)) + return; + br_write_lock(vfsmount_lock); + if (!atomic_dec_and_test(&mnt->mnt_count)) { + br_write_unlock(vfsmount_lock); + return; + } + if (likely(!mnt->mnt_pinned)) { + br_write_unlock(vfsmount_lock); + __mntput(mnt); + return; } + atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); + mnt->mnt_pinned = 0; + br_write_unlock(vfsmount_lock); + acct_auto_close_mnt(mnt); + goto repeat; } - EXPORT_SYMBOL(mntput_no_expire); void mnt_pin(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_pinned++; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (mnt->mnt_pinned) { atomic_inc(&mnt->mnt_count); mnt->mnt_pinned--; } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -746,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p) struct mnt_namespace *ns = p->ns; int res = 0; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); if (p->event != ns->event) { p->event = ns->event; res = 1; } - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return res; } @@ -952,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += atomic_read(&p->mnt_count); minimum_refs += 2; } - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -984,10 +1018,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1003,13 +1037,14 @@ void release_mounts(struct list_head *head) if (mnt->mnt_parent != mnt) { struct dentry *dentry; struct vfsmount *m; - spin_lock(&vfsmount_lock); + + br_write_lock(vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); dput(dentry); mntput(m); } @@ -1017,6 +1052,10 @@ void release_mounts(struct list_head *head) } } +/* + * vfsmount lock must be held for write + * namespace_sem must be held for write + */ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) { struct vfsmount *p; @@ -1107,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags) } down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1119,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags) umount_tree(mnt, 1, &umount_list); retval = 0; } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1231,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt_root, flag); if (!q) goto Enomem; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(res, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1262,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt) { LIST_HEAD(umount_list); down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(mnt, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1392,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, if (err) goto out_cleanup_ids; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1411,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); + return 0; out_cleanup_ids: @@ -1466,10 +1506,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1513,9 +1553,10 @@ static int do_loopback(struct path *path, char *old_name, err = graft_tree(mnt, path); if (err) { LIST_HEAD(umount_list); - spin_lock(&vfsmount_lock); + + br_write_lock(vfsmount_lock); umount_tree(mnt, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); } @@ -1568,16 +1609,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; path->mnt->mnt_flags = mnt_flags; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } up_write(&sb->s_umount); if (!err) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); touch_mnt_namespace(path->mnt->mnt_ns); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } return err; } @@ -1754,7 +1795,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1773,7 +1814,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -1830,6 +1871,8 @@ resume: /* * process a list of expirable mountpoints with the intent of discarding any * submounts of a specific parent mountpoint + * + * vfsmount_lock must be held for write */ static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) { @@ -2048,9 +2091,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, kfree(new_ns); return ERR_PTR(-ENOMEM); } - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&new_ns->list, &new_ns->root->mnt_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2244,7 +2287,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, goto out2; /* not attached */ /* make sure we can reach put_old from new_root */ tmp = old.mnt; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (tmp != new.mnt) { for (;;) { if (tmp->mnt_parent == tmp) @@ -2264,7 +2307,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new.mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; path_put(&root_parent); @@ -2279,7 +2322,7 @@ out1: out0: return error; out3: - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); goto out2; } @@ -2326,6 +2369,8 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); + br_lock_init(vfsmount_lock); + err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", @@ -2344,9 +2389,9 @@ void put_mnt_ns(struct mnt_namespace *ns) if (!atomic_dec_and_test(&ns->count)) return; down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(ns->root, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 26a510a..6c2aad4 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -63,7 +63,6 @@ config NFS_V3_ACL config NFS_V4 bool "NFS client support for NFS version 4" depends on NFS_FS - select RPCSEC_GSS_KRB5 help This option enables support for version 4 of the NFS protocol (RFC 3530) in the kernel's NFS client. diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 29539ce..e257172 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -140,6 +140,13 @@ nfs_opendir(struct inode *inode, struct file *filp) /* Call generic open code in order to cache credentials */ res = nfs_open(inode, filp); + if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { + /* This is a mountpoint, so d_revalidate will never + * have been called, so we need to refresh the + * inode (for close-open consistency) ourselves. + */ + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + } return res; } @@ -1103,7 +1110,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open_dput; /* We can't create new files, or truncate existing ones here */ - openflags &= ~(O_CREAT|O_TRUNC); + openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); /* * Note: we're not holding inode->i_mutex and so may be racing with diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2d141a7..eb51bd6 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -323,7 +323,7 @@ nfs_file_fsync(struct file *file, int datasync) have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); if (have_error) ret = xchg(&ctx->error, 0); - if (!ret) + if (!ret && status < 0) ret = status; return ret; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ffbb98..089da5b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2036,7 +2036,8 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct rpc_cred *cred; struct nfs4_state *state; struct dentry *res; - fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); + int open_flags = nd->intent.open.flags; + fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -2044,8 +2045,9 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (!IS_POSIXACL(dir)) attr.ia_mode &= ~current_umask(); } else { + open_flags &= ~O_EXCL; attr.ia_valid = 0; - BUG_ON(nd->intent.open.flags & O_CREAT); + BUG_ON(open_flags & O_CREAT); } cred = rpc_lookup_cred(); @@ -2054,7 +2056,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ nfs_block_sillyrename(parent); - state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred); + state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { if (PTR_ERR(state) == -ENOENT) { @@ -2273,8 +2275,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct out: if (page) __free_page(page); - if (locations) - kfree(locations); + kfree(locations); return status; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ee26316..ec3966e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -655,6 +655,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->options & NFS_OPTION_FSCACHE) seq_printf(m, ",fsc"); + + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) + seq_printf(m, ",lookupcache=none"); + else + seq_printf(m, ",lookupcache=pos"); + } } /* diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 503b9da..95932f5 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -69,7 +69,6 @@ config NFSD_V4 depends on NFSD && PROC_FS && EXPERIMENTAL select NFSD_V3 select FS_POSIX_ACL - select RPCSEC_GSS_KRB5 help This option enables support in your system's NFS server for version 4 of the NFS protocol (RFC 3530). diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1fa86b9..9222633 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; - int barrier_done = 0; - if (nilfs_test_opt(sbi, BARRIER)) { - set_buffer_ordered(nilfs->ns_sbh[0]); - barrier_done = 1; - } retry: set_buffer_dirty(nilfs->ns_sbh[0]); - err = sync_dirty_buffer(nilfs->ns_sbh[0]); - if (err == -EOPNOTSUPP && barrier_done) { - nilfs_warning(sbi->s_super, __func__, - "barrier-based sync failed. " - "disabling barriers\n"); - nilfs_clear_opt(sbi, BARRIER); - barrier_done = 0; - clear_buffer_ordered(nilfs->ns_sbh[0]); - goto retry; + + if (nilfs_test_opt(sbi, BARRIER)) { + err = __sync_dirty_buffer(nilfs->ns_sbh[0], + WRITE_SYNC | WRITE_BARRIER); + if (err == -EOPNOTSUPP) { + nilfs_warning(sbi->s_super, __func__, + "barrier-based sync failed. " + "disabling barriers\n"); + nilfs_clear_opt(sbi, BARRIER); + goto retry; + } + } else { + err = sync_dirty_buffer(nilfs->ns_sbh[0]); } + if (unlikely(err)) { printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); @@ -400,9 +400,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) list_add(&sbi->s_list, &nilfs->ns_supers); up_write(&nilfs->ns_super_sem); + err = -ENOMEM; sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size); if (!sbi->s_ifile) - return -ENOMEM; + goto delist; down_read(&nilfs->ns_segctor_sem); err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, @@ -433,6 +434,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) nilfs_mdt_destroy(sbi->s_ifile); sbi->s_ifile = NULL; + delist: down_write(&nilfs->ns_super_sem); list_del_init(&sbi->s_list); up_write(&nilfs->ns_super_sem); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 37de1f0..6af1c00 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -608,11 +608,11 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, return -EINVAL; } - if (swp) { + if (!valid[!swp]) printk(KERN_WARNING "NILFS warning: broken superblock. " "using spare superblock.\n"); + if (swp) nilfs_swap_super_block(nilfs); - } nilfs->ns_sbwcount = 0; nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); diff --git a/fs/open.c b/fs/open.c index 630715f..d74e198 100644 --- a/fs/open.c +++ b/fs/open.c @@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); - file_move(f, &inode->i_sb->s_files); + file_sb_list_add(f, inode->i_sb); error = security_dentry_open(f, cred); if (error) @@ -721,7 +721,7 @@ cleanup_all: mnt_drop_write(mnt); } } - file_kill(f); + file_sb_list_del(f); f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: diff --git a/fs/pnode.c b/fs/pnode.c index 5cc564a..8066b8d 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -126,6 +126,9 @@ static int do_make_slave(struct vfsmount *mnt) return 0; } +/* + * vfsmount lock must be held for write + */ void change_mnt_propagation(struct vfsmount *mnt, int type) { if (type == MS_SHARED) { @@ -270,12 +273,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, prev_src_mnt = child; } out: - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); umount_tree(child, 0, &umount_list); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); return ret; } @@ -296,6 +299,8 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) * other mounts its parent propagates to. * Check if any of these mounts that **do not have submounts** * have more references than 'refcnt'. If so return busy. + * + * vfsmount lock must be held for read or write */ int propagate_mount_busy(struct vfsmount *mnt, int refcnt) { @@ -353,6 +358,8 @@ static void __propagate_umount(struct vfsmount *mnt) * collect all mounts that receive propagation from the mount in @list, * and return these additional mounts in the same list. * @list: the list of mounts to be unmounted. + * + * vfsmount lock must be held for write */ int propagate_umount(struct list_head *list) { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ae35413..caa7583 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -83,6 +83,7 @@ void reiserfs_evict_inode(struct inode *inode) dquot_drop(inode); inode->i_blocks = 0; reiserfs_write_unlock_once(inode->i_sb, depth); + return; no_delete: end_writeback(inode); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 1ec952b..812e2c0 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb, /* flush out the real blocks */ for (i = 0; i < get_desc_trans_len(desc); i++) { set_buffer_dirty(real_blocks[i]); - ll_rw_block(SWRITE, 1, real_blocks + i); + write_dirty_buffer(real_blocks[i], WRITE); } for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(real_blocks[i]); diff --git a/fs/super.c b/fs/super.c index 9674ab2..8819e3a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type) s = NULL; goto out; } +#ifdef CONFIG_SMP + s->s_files = alloc_percpu(struct list_head); + if (!s->s_files) { + security_sb_free(s); + kfree(s); + s = NULL; + goto out; + } else { + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); + } +#else INIT_LIST_HEAD(&s->s_files); +#endif INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); @@ -108,6 +123,9 @@ out: */ static inline void destroy_super(struct super_block *s) { +#ifdef CONFIG_SMP + free_percpu(s->s_files); +#endif security_sb_free(s); kfree(s->s_subtype); kfree(s->s_options); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 048484f..46f7a80 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -207,10 +205,8 @@ do_more: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); if (overflow) { fragment += count; @@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); @@ -680,10 +674,8 @@ cg_found: succed: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; result += cgno * uspi->s_fpg; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 428017e..2eabf04 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer(UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); UFSD("EXIT\n"); } @@ -290,10 +286,8 @@ cg_found: } ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; inode->i_ino = cg * uspi->s_ipg + bit; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 34d5cb1..a58f915 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) ubh_bforget(ind_ubh); ind_ubh = NULL; } - if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { - ubh_ll_rw_block(SWRITE, ind_ubh); - ubh_wait_on_buffer (ind_ubh); - } + if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) + ubh_sync_block(ind_ubh); ubh_brelse (ind_ubh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) ubh_bforget(dind_bh); dind_bh = NULL; } - if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { - ubh_ll_rw_block(SWRITE, dind_bh); - ubh_wait_on_buffer (dind_bh); - } + if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) + ubh_sync_block(dind_bh); ubh_brelse (dind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode) ubh_bforget(tind_bh); tind_bh = NULL; } - if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { - ubh_ll_rw_block(SWRITE, tind_bh); - ubh_wait_on_buffer (tind_bh); - } + if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) + ubh_sync_block(tind_bh); ubh_brelse (tind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 85a7fc9..d2c36d5 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag) } } -void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) +void ubh_sync_block(struct ufs_buffer_head *ubh) { - if (!ubh) - return; + if (ubh) { + unsigned i; - ll_rw_block(rw, ubh->count, ubh->bh); -} + for (i = 0; i < ubh->count; i++) + write_dirty_buffer(ubh->bh[i], WRITE); -void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) -{ - unsigned i; - if (!ubh) - return; - for ( i = 0; i < ubh->count; i++ ) - wait_on_buffer (ubh->bh[i]); + for (i = 0; i < ubh->count; i++) + wait_on_buffer(ubh->bh[i]); + } } void ubh_bforget (struct ufs_buffer_head * ubh) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 0466036..9f8775c 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *); extern void ubh_brelse_uspi (struct ufs_sb_private_info *); extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); -extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); -extern void ubh_wait_on_buffer (struct ufs_buffer_head *); +extern void ubh_sync_block(struct ufs_buffer_head *); extern void ubh_bforget (struct ufs_buffer_head *); extern int ubh_buffer_dirty (struct ufs_buffer_head *); #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) diff --git a/include/asm-generic/syscalls.h b/include/asm-generic/syscalls.h index df84e3b..d89dec8 100644 --- a/include/asm-generic/syscalls.h +++ b/include/asm-generic/syscalls.h @@ -23,8 +23,10 @@ asmlinkage long sys_vfork(struct pt_regs *regs); #endif #ifndef sys_execve -asmlinkage long sys_execve(char __user *filename, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs); +asmlinkage long sys_execve(const char __user *filename, + const char __user *const __user *argv, + const char __user *const __user *envp, + struct pt_regs *regs); #endif #ifndef sys_mmap2 diff --git a/include/linux/amba/clcd.h b/include/linux/amba/clcd.h index ca16c38..be33b3a 100644 --- a/include/linux/amba/clcd.h +++ b/include/linux/amba/clcd.h @@ -150,6 +150,7 @@ struct clcd_fb { u16 off_cntl; u32 clcd_cntl; u32 cmap[16]; + bool clk_enabled; }; static inline void clcdfb_decode(struct clcd_fb *fb, struct clcd_regs *regs) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index c809e28..a065612 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -50,8 +50,8 @@ struct linux_binprm{ int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ unsigned int per_clear; /* bits to clear in current->personality */ int argc, envc; - char * filename; /* Name of binary as seen by procps */ - char * interp; /* Name of the binary really executed. Most + const char * filename; /* Name of binary as seen by procps */ + const char * interp; /* Name of the binary really executed. Most of the time same as filename, but could be different for binfmt_{misc,script} */ unsigned interp_flags; @@ -126,7 +126,8 @@ extern int setup_arg_pages(struct linux_binprm * bprm, unsigned long stack_top, int executable_stack); extern int bprm_mm_init(struct linux_binprm *bprm); -extern int copy_strings_kernel(int argc,char ** argv,struct linux_binprm *bprm); +extern int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm); extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void do_coredump(long signr, int exit_code, struct pt_regs *regs); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 43e649a..ec94c12 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Ordered, /* ordered write */ BH_Eopnotsupp, /* operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -125,7 +124,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Ordered, ordered) BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) @@ -183,6 +181,8 @@ void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); +int __sync_dirty_buffer(struct buffer_head *bh, int rw); +void write_dirty_buffer(struct buffer_head *bh, int rw); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e..3cb7d04 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -75,7 +75,7 @@ struct cgroup_subsys_state { unsigned long flags; /* ID for this css, if possible */ - struct css_id *id; + struct css_id __rcu *id; }; /* bits in struct cgroup_subsys_state flags field */ @@ -205,7 +205,7 @@ struct cgroup { struct list_head children; /* my children */ struct cgroup *parent; /* my parent */ - struct dentry *dentry; /* cgroup fs entry, RCU protected */ + struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ /* Private pointers for each registered subsystem */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c1a62c5..320d6c9 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -16,7 +16,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +#ifdef CONFIG_SPARSE_RCU_POINTER +# define __rcu __attribute__((noderef, address_space(4))) +#else # define __rcu +#endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); #else diff --git a/include/linux/cred.h b/include/linux/cred.h index 4d2c395..4aaeab3 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -84,7 +84,7 @@ struct thread_group_cred { atomic_t usage; pid_t tgid; /* thread group process ID */ spinlock_t lock; - struct key *session_keyring; /* keyring inherited over fork */ + struct key __rcu *session_keyring; /* keyring inherited over fork */ struct key *process_keyring; /* keyring private to this process */ struct rcu_head rcu; /* RCU deletion hook */ }; diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index f59ed29..133c0ba 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -31,7 +31,7 @@ struct embedded_fd_set { struct fdtable { unsigned int max_fds; - struct file ** fd; /* current fd array */ + struct file __rcu **fd; /* current fd array */ fd_set *close_on_exec; fd_set *open_fds; struct rcu_head rcu; @@ -46,7 +46,7 @@ struct files_struct { * read mostly part */ atomic_t count; - struct fdtable *fdt; + struct fdtable __rcu *fdt; struct fdtable fdtab; /* * written part on a separate cache line in SMP @@ -55,7 +55,7 @@ struct files_struct { int next_fd; struct embedded_fd_set close_on_exec_init; struct embedded_fd_set open_fds_init; - struct file * fd_array[NR_OPEN_DEFAULT]; + struct file __rcu * fd_array[NR_OPEN_DEFAULT]; }; #define rcu_dereference_check_fdtable(files, fdtfd) \ diff --git a/include/linux/fs.h b/include/linux/fs.h index 9a96b4d..aa3dc8d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,9 +125,6 @@ struct inodes_stat_t { * block layer could (in theory) choose to ignore this * request if it runs into resource problems. * WRITE A normal async write. Device will be plugged. - * SWRITE Like WRITE, but a special case for ll_rw_block() that - * tells it to lock the buffer first. Normally a buffer - * must be locked before doing IO. * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down * the hint that someone will be waiting on this IO * shortly. The device must still be unplugged explicitly, @@ -138,9 +135,6 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * SWRITE_SYNC - * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. - * See SWRITE. * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all * previously submitted writes must be safely on storage * before this one is started. Also guarantees that when @@ -155,7 +149,6 @@ struct inodes_stat_t { #define READ 0 #define WRITE RW_MASK #define READA RWA_MASK -#define SWRITE (WRITE | READA) #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) #define READ_META (READ | REQ_META) @@ -165,8 +158,6 @@ struct inodes_stat_t { #define WRITE_META (WRITE | REQ_META) #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_HARDBARRIER) -#define SWRITE_SYNC_PLUG (SWRITE | REQ_SYNC | REQ_NOIDLE) -#define SWRITE_SYNC (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) /* * These aren't really reads or writes, they pass down information about @@ -929,6 +920,9 @@ struct file { #define f_vfsmnt f_path.mnt const struct file_operations *f_op; spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ +#ifdef CONFIG_SMP + int f_sb_list_cpu; +#endif atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; @@ -953,9 +947,6 @@ struct file { unsigned long f_mnt_write_state; #endif }; -extern spinlock_t files_lock; -#define file_list_lock() spin_lock(&files_lock); -#define file_list_unlock() spin_unlock(&files_lock); #define get_file(x) atomic_long_inc(&(x)->f_count) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) @@ -1346,7 +1337,11 @@ struct super_block { struct list_head s_inodes; /* all inodes */ struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ +#ifdef CONFIG_SMP + struct list_head __percpu *s_files; +#else struct list_head s_files; +#endif /* s_dentry_lru and s_nr_dentry_unused are protected by dcache_lock */ struct list_head s_dentry_lru; /* unused dentry lru */ int s_nr_dentry_unused; /* # of dentry on lru */ @@ -1385,7 +1380,7 @@ struct super_block { * Saved mount options for lazy filesystems using * generic_show_options() */ - char *s_options; + char __rcu *s_options; }; extern struct timespec current_fs_time(struct super_block *sb); @@ -2197,8 +2192,6 @@ static inline void insert_inode_hash(struct inode *inode) { __insert_inode_hash(inode, inode->i_ino); } -extern void file_move(struct file *f, struct list_head *list); -extern void file_kill(struct file *f); #ifdef CONFIG_BLOCK extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index eca3d52..a42b5bf 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -5,7 +5,7 @@ struct fs_struct { int users; - rwlock_t lock; + spinlock_t lock; int umask; int in_exec; struct path root, pwd; @@ -23,29 +23,29 @@ extern int unshare_fs_struct(void); static inline void get_fs_root(struct fs_struct *fs, struct path *root) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *root = fs->root; path_get(root); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *pwd = fs->pwd; path_get(pwd); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root, struct path *pwd) { - read_lock(&fs->lock); + spin_lock(&fs->lock); *root = fs->root; path_get(root); *pwd = fs->pwd; path_get(pwd); - read_unlock(&fs->lock); + spin_unlock(&fs->lock); } #endif /* _LINUX_FS_STRUCT_H */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5f2f4c4..af3f06b 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -129,8 +129,8 @@ struct blk_scsi_cmd_filter { struct disk_part_tbl { struct rcu_head rcu_head; int len; - struct hd_struct *last_lookup; - struct hd_struct *part[]; + struct hd_struct __rcu *last_lookup; + struct hd_struct __rcu *part[]; }; struct gendisk { @@ -149,7 +149,7 @@ struct gendisk { * non-critical accesses use RCU. Always access through * helpers. */ - struct disk_part_tbl *part_tbl; + struct disk_part_tbl __rcu *part_tbl; struct hd_struct part0; const struct block_device_operations *fops; diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index d5b3876..1f4517d 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -139,7 +139,7 @@ static inline void account_system_vtime(struct task_struct *tsk) #endif #if defined(CONFIG_NO_HZ) -#if defined(CONFIG_TINY_RCU) +#if defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) extern void rcu_enter_nohz(void); extern void rcu_exit_nohz(void); diff --git a/include/linux/idr.h b/include/linux/idr.h index e968db7..cdb715e 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -50,14 +50,14 @@ struct idr_layer { unsigned long bitmap; /* A zero bit means "space here" */ - struct idr_layer *ary[1< +#include +#include + +/* can make br locks by using local lock for read side, global lock for write */ +#define br_lock_init(name) name##_lock_init() +#define br_read_lock(name) name##_local_lock() +#define br_read_unlock(name) name##_local_unlock() +#define br_write_lock(name) name##_global_lock_online() +#define br_write_unlock(name) name##_global_unlock_online() + +#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) +#define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) + + +#define lg_lock_init(name) name##_lock_init() +#define lg_local_lock(name) name##_local_lock() +#define lg_local_unlock(name) name##_local_unlock() +#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) +#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) +#define lg_global_lock(name) name##_global_lock() +#define lg_global_unlock(name) name##_global_unlock() +#define lg_global_lock_online(name) name##_global_lock_online() +#define lg_global_unlock_online(name) name##_global_unlock_online() + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define LOCKDEP_INIT_MAP lockdep_init_map + +#define DEFINE_LGLOCK_LOCKDEP(name) \ + struct lock_class_key name##_lock_key; \ + struct lockdep_map name##_lock_dep_map; \ + EXPORT_SYMBOL(name##_lock_dep_map) + +#else +#define LOCKDEP_INIT_MAP(a, b, c, d) + +#define DEFINE_LGLOCK_LOCKDEP(name) +#endif + + +#define DECLARE_LGLOCK(name) \ + extern void name##_lock_init(void); \ + extern void name##_local_lock(void); \ + extern void name##_local_unlock(void); \ + extern void name##_local_lock_cpu(int cpu); \ + extern void name##_local_unlock_cpu(int cpu); \ + extern void name##_global_lock(void); \ + extern void name##_global_unlock(void); \ + extern void name##_global_lock_online(void); \ + extern void name##_global_unlock_online(void); \ + +#define DEFINE_LGLOCK(name) \ + \ + DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ + DEFINE_LGLOCK_LOCKDEP(name); \ + \ + void name##_lock_init(void) { \ + int i; \ + LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ + for_each_possible_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ + } \ + } \ + EXPORT_SYMBOL(name##_lock_init); \ + \ + void name##_local_lock(void) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock); \ + \ + void name##_local_unlock(void) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &__get_cpu_var(name##_lock); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock); \ + \ + void name##_local_lock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + preempt_disable(); \ + rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_lock(lock); \ + } \ + EXPORT_SYMBOL(name##_local_lock_cpu); \ + \ + void name##_local_unlock_cpu(int cpu) { \ + arch_spinlock_t *lock; \ + rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ + lock = &per_cpu(name##_lock, cpu); \ + arch_spin_unlock(lock); \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_local_unlock_cpu); \ + \ + void name##_global_lock_online(void) { \ + int i; \ + preempt_disable(); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock_online); \ + \ + void name##_global_unlock_online(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_global_unlock_online); \ + \ + void name##_global_lock(void) { \ + int i; \ + preempt_disable(); \ + rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_lock(lock); \ + } \ + } \ + EXPORT_SYMBOL(name##_global_lock); \ + \ + void name##_global_unlock(void) { \ + int i; \ + rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ + for_each_online_cpu(i) { \ + arch_spinlock_t *lock; \ + lock = &per_cpu(name##_lock, i); \ + arch_spin_unlock(lock); \ + } \ + preempt_enable(); \ + } \ + EXPORT_SYMBOL(name##_global_unlock); +#endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index b8bb9a6..05537a5 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -299,7 +299,7 @@ struct mm_struct { * new_owner->mm == mm * new_owner->alloc_lock is held */ - struct task_struct *owner; + struct task_struct __rcu *owner; #endif #ifdef CONFIG_PROC_FS diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 508f8cf..d0edf7d 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -185,7 +185,7 @@ struct nfs_inode { struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ struct list_head open_states; - struct nfs_delegation *delegation; + struct nfs_delegation __rcu *delegation; fmode_t delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ diff --git a/include/linux/notifier.h b/include/linux/notifier.h index b2f1a4d..2026f9e 100644 --- a/include/linux/notifier.h +++ b/include/linux/notifier.h @@ -49,28 +49,28 @@ struct notifier_block { int (*notifier_call)(struct notifier_block *, unsigned long, void *); - struct notifier_block *next; + struct notifier_block __rcu *next; int priority; }; struct atomic_notifier_head { spinlock_t lock; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct blocking_notifier_head { struct rw_semaphore rwsem; - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct raw_notifier_head { - struct notifier_block *head; + struct notifier_block __rcu *head; }; struct srcu_notifier_head { struct mutex mutex; struct srcu_struct srcu; - struct notifier_block *head; + struct notifier_block __rcu *head; }; #define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 634b8e6..a39cbed 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -47,6 +47,8 @@ static inline void *radix_tree_indirect_to_ptr(void *ptr) { return (void *)((unsigned long)ptr & ~RADIX_TREE_INDIRECT_PTR); } +#define radix_tree_indirect_to_ptr(ptr) \ + radix_tree_indirect_to_ptr((void __force *)(ptr)) static inline int radix_tree_is_indirect_ptr(void *ptr) { @@ -61,7 +63,7 @@ static inline int radix_tree_is_indirect_ptr(void *ptr) struct radix_tree_root { unsigned int height; gfp_t gfp_mask; - struct radix_tree_node *rnode; + struct radix_tree_node __rcu *rnode; }; #define RADIX_TREE_INIT(mask) { \ diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4ec3b38..c10b105 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -10,6 +10,12 @@ #include /* + * return the ->next pointer of a list_head in an rcu safe + * way, we must not access it directly + */ +#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) + +/* * Insert a new entry between two known consecutive entries. * * This is only for internal list manipulation where we know @@ -20,7 +26,7 @@ static inline void __list_add_rcu(struct list_head *new, { new->next = next; new->prev = prev; - rcu_assign_pointer(prev->next, new); + rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } @@ -138,7 +144,7 @@ static inline void list_replace_rcu(struct list_head *old, { new->next = old->next; new->prev = old->prev; - rcu_assign_pointer(new->prev->next, new); + rcu_assign_pointer(list_next_rcu(new->prev), new); new->next->prev = new; old->prev = LIST_POISON2; } @@ -193,7 +199,7 @@ static inline void list_splice_init_rcu(struct list_head *list, */ last->next = at; - rcu_assign_pointer(head->next, first); + rcu_assign_pointer(list_next_rcu(head), first); first->prev = head; at->prev = last; } @@ -208,7 +214,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ - container_of(rcu_dereference_raw(ptr), type, member) + ({typeof (*ptr) __rcu *__ptr = (typeof (*ptr) __rcu __force *)ptr; \ + container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ + }) /** * list_first_entry_rcu - get the first element from a list @@ -225,9 +233,9 @@ static inline void list_splice_init_rcu(struct list_head *list, list_entry_rcu((ptr)->next, type, member) #define __list_for_each_rcu(pos, head) \ - for (pos = rcu_dereference_raw((head)->next); \ + for (pos = rcu_dereference_raw(list_next_rcu(head)); \ pos != (head); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(list_next_rcu((pos))) /** * list_for_each_entry_rcu - iterate over rcu list of given type @@ -257,9 +265,9 @@ static inline void list_splice_init_rcu(struct list_head *list, * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_continue_rcu(pos, head) \ - for ((pos) = rcu_dereference_raw((pos)->next); \ + for ((pos) = rcu_dereference_raw(list_next_rcu(pos)); \ prefetch((pos)->next), (pos) != (head); \ - (pos) = rcu_dereference_raw((pos)->next)) + (pos) = rcu_dereference_raw(list_next_rcu(pos))) /** * list_for_each_entry_continue_rcu - continue iteration over list of given type @@ -314,12 +322,19 @@ static inline void hlist_replace_rcu(struct hlist_node *old, new->next = next; new->pprev = old->pprev; - rcu_assign_pointer(*new->pprev, new); + rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); if (next) new->next->pprev = &new->next; old->pprev = LIST_POISON2; } +/* + * return the first or the next element in an RCU protected hlist + */ +#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) +#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) +#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) + /** * hlist_add_head_rcu * @n: the element to add to the hash list. @@ -346,7 +361,7 @@ static inline void hlist_add_head_rcu(struct hlist_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_first_rcu(h), n); if (first) first->pprev = &n->next; } @@ -374,7 +389,7 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, { n->pprev = next->pprev; n->next = next; - rcu_assign_pointer(*(n->pprev), n); + rcu_assign_pointer(hlist_pprev_rcu(n), n); next->pprev = &n->next; } @@ -401,15 +416,15 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, { n->next = prev->next; n->pprev = &prev->next; - rcu_assign_pointer(prev->next, n); + rcu_assign_pointer(hlist_next_rcu(prev), n); if (n->next) n->next->pprev = &n->next; } -#define __hlist_for_each_rcu(pos, head) \ - for (pos = rcu_dereference((head)->first); \ - pos && ({ prefetch(pos->next); 1; }); \ - pos = rcu_dereference(pos->next)) +#define __hlist_for_each_rcu(pos, head) \ + for (pos = rcu_dereference(hlist_first_rcu(head)); \ + pos && ({ prefetch(pos->next); 1; }); \ + pos = rcu_dereference(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu - iterate over rcu list of given type @@ -422,11 +437,11 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev, * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ -#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ +#define hlist_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_first_rcu(head)); \ pos && ({ prefetch(pos->next); 1; }) && \ ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index b70ffe5..2ae1371 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -37,6 +37,12 @@ static inline void hlist_nulls_del_init_rcu(struct hlist_nulls_node *n) } } +#define hlist_nulls_first_rcu(head) \ + (*((struct hlist_nulls_node __rcu __force **)&(head)->first)) + +#define hlist_nulls_next_rcu(node) \ + (*((struct hlist_nulls_node __rcu __force **)&(node)->next)) + /** * hlist_nulls_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. @@ -88,7 +94,7 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, n->next = first; n->pprev = &h->first; - rcu_assign_pointer(h->first, n); + rcu_assign_pointer(hlist_nulls_first_rcu(h), n); if (!is_a_nulls(first)) first->pprev = &n->next; } @@ -100,11 +106,11 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, * @member: the name of the hlist_nulls_node within the struct. * */ -#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ - for (pos = rcu_dereference_raw((head)->first); \ - (!is_a_nulls(pos)) && \ +#define hlist_nulls_for_each_entry_rcu(tpos, pos, head, member) \ + for (pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ + (!is_a_nulls(pos)) && \ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ - pos = rcu_dereference_raw(pos->next)) + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) #endif #endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9fbc54a..89414d6 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -41,11 +41,15 @@ #include #include #include +#include #ifdef CONFIG_RCU_TORTURE_TEST extern int rcutorture_runnable; /* for sysctl */ #endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ +#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) +#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) + /** * struct rcu_head - callback structure for use with RCU * @next: next update requests in a list @@ -57,29 +61,94 @@ struct rcu_head { }; /* Exported common interfaces */ -extern void rcu_barrier(void); +extern void call_rcu_sched(struct rcu_head *head, + void (*func)(struct rcu_head *rcu)); +extern void synchronize_sched(void); extern void rcu_barrier_bh(void); extern void rcu_barrier_sched(void); extern void synchronize_sched_expedited(void); extern int sched_expedited_torture_stats(char *page); +static inline void __rcu_read_lock_bh(void) +{ + local_bh_disable(); +} + +static inline void __rcu_read_unlock_bh(void) +{ + local_bh_enable(); +} + +#ifdef CONFIG_PREEMPT_RCU + +extern void __rcu_read_lock(void); +extern void __rcu_read_unlock(void); +void synchronize_rcu(void); + +/* + * Defined as a macro as it is a very low level header included from + * areas that don't even know about current. This gives the rcu_read_lock() + * nesting depth, but makes sense only if CONFIG_PREEMPT_RCU -- in other + * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. + */ +#define rcu_preempt_depth() (current->rcu_read_lock_nesting) + +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +static inline void __rcu_read_lock(void) +{ + preempt_disable(); +} + +static inline void __rcu_read_unlock(void) +{ + preempt_enable(); +} + +static inline void synchronize_rcu(void) +{ + synchronize_sched(); +} + +static inline int rcu_preempt_depth(void) +{ + return 0; +} + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + /* Internal to kernel */ extern void rcu_init(void); +extern void rcu_sched_qs(int cpu); +extern void rcu_bh_qs(int cpu); +extern void rcu_check_callbacks(int cpu, int user); +struct notifier_block; + +#ifdef CONFIG_NO_HZ + +extern void rcu_enter_nohz(void); +extern void rcu_exit_nohz(void); + +#else /* #ifdef CONFIG_NO_HZ */ + +static inline void rcu_enter_nohz(void) +{ +} + +static inline void rcu_exit_nohz(void) +{ +} + +#endif /* #else #ifdef CONFIG_NO_HZ */ #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) #include -#elif defined(CONFIG_TINY_RCU) +#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) #include #else #error "Unknown RCU implementation specified to kernel configuration" #endif -#define RCU_HEAD_INIT { .next = NULL, .func = NULL } -#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT -#define INIT_RCU_HEAD(ptr) do { \ - (ptr)->next = NULL; (ptr)->func = NULL; \ -} while (0) - /* * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic * initialization and destruction of rcu_head on the stack. rcu_head structures @@ -120,14 +189,15 @@ extern struct lockdep_map rcu_sched_lock_map; extern int debug_lockdep_rcu_enabled(void); /** - * rcu_read_lock_held - might we be in RCU read-side critical section? + * rcu_read_lock_held() - might we be in RCU read-side critical section? * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, * this assumes we are in an RCU read-side critical section unless it can - * prove otherwise. + * prove otherwise. This is useful for debug checks in functions that + * require that they be called within an RCU read-side critical section. * - * Check debug_lockdep_rcu_enabled() to prevent false positives during boot + * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. */ static inline int rcu_read_lock_held(void) @@ -144,14 +214,16 @@ static inline int rcu_read_lock_held(void) extern int rcu_read_lock_bh_held(void); /** - * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section? + * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section? * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an * RCU-sched read-side critical section. In absence of * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side * critical section unless it can prove otherwise. Note that disabling * of preemption (including disabling irqs) counts as an RCU-sched - * read-side critical section. + * read-side critical section. This is useful for debug checks in functions + * that required that they be called within an RCU-sched read-side + * critical section. * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot * and while lockdep is disabled. @@ -211,7 +283,11 @@ static inline int rcu_read_lock_sched_held(void) extern int rcu_my_thread_group_empty(void); -#define __do_rcu_dereference_check(c) \ +/** + * rcu_lockdep_assert - emit lockdep splat if specified condition not met + * @c: condition to check + */ +#define rcu_lockdep_assert(c) \ do { \ static bool __warned; \ if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ @@ -220,41 +296,155 @@ extern int rcu_my_thread_group_empty(void); } \ } while (0) +#else /* #ifdef CONFIG_PROVE_RCU */ + +#define rcu_lockdep_assert(c) do { } while (0) + +#endif /* #else #ifdef CONFIG_PROVE_RCU */ + +/* + * Helper functions for rcu_dereference_check(), rcu_dereference_protected() + * and rcu_assign_pointer(). Some of these could be folded into their + * callers, but they are left separate in order to ease introduction of + * multiple flavors of pointers to match the multiple flavors of RCU + * (e.g., __rcu_bh, * __rcu_sched, and __srcu), should this make sense in + * the future. + */ +#define __rcu_access_pointer(p, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + (void) (((typeof (*p) space *)p) == p); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_check(p, c, space) \ + ({ \ + typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ + rcu_lockdep_assert(c); \ + (void) (((typeof (*p) space *)p) == p); \ + smp_read_barrier_depends(); \ + ((typeof(*p) __force __kernel *)(_________p1)); \ + }) +#define __rcu_dereference_protected(p, c, space) \ + ({ \ + rcu_lockdep_assert(c); \ + (void) (((typeof (*p) space *)p) == p); \ + ((typeof(*p) __force __kernel *)(p)); \ + }) + +#define __rcu_dereference_index_check(p, c) \ + ({ \ + typeof(p) _________p1 = ACCESS_ONCE(p); \ + rcu_lockdep_assert(c); \ + smp_read_barrier_depends(); \ + (_________p1); \ + }) +#define __rcu_assign_pointer(p, v, space) \ + ({ \ + if (!__builtin_constant_p(v) || \ + ((v) != NULL)) \ + smp_wmb(); \ + (p) = (typeof(*v) __force space *)(v); \ + }) + + /** - * rcu_dereference_check - rcu_dereference with debug checking + * rcu_access_pointer() - fetch RCU pointer with no dereferencing + * @p: The pointer to read + * + * Return the value of the specified RCU-protected pointer, but omit the + * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful + * when the value of this pointer is accessed, but the pointer is not + * dereferenced, for example, when testing an RCU-protected pointer against + * NULL. Although rcu_access_pointer() may also be used in cases where + * update-side locks prevent the value of the pointer from changing, you + * should instead use rcu_dereference_protected() for this use case. + */ +#define rcu_access_pointer(p) __rcu_access_pointer((p), __rcu) + +/** + * rcu_dereference_check() - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * @c: The conditions under which the dereference will take place * * Do an rcu_dereference(), but check that the conditions under which the - * dereference will take place are correct. Typically the conditions indicate - * the various locking conditions that should be held at that point. The check - * should return true if the conditions are satisfied. + * dereference will take place are correct. Typically the conditions + * indicate the various locking conditions that should be held at that + * point. The check should return true if the conditions are satisfied. + * An implicit check for being in an RCU read-side critical section + * (rcu_read_lock()) is included. * * For example: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock)); + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock)); * * could be used to indicate to lockdep that foo->bar may only be dereferenced - * if either the RCU read lock is held, or that the lock required to replace + * if either rcu_read_lock() is held, or that the lock required to replace * the bar struct at foo->bar is held. * * Note that the list of conditions may also include indications of when a lock * need not be held, for example during initialisation or destruction of the * target struct: * - * bar = rcu_dereference_check(foo->bar, rcu_read_lock_held() || - * lockdep_is_held(&foo->lock) || + * bar = rcu_dereference_check(foo->bar, lockdep_is_held(&foo->lock) || * atomic_read(&foo->usage) == 0); + * + * Inserts memory barriers on architectures that require them + * (currently only the Alpha), prevents the compiler from refetching + * (and from merging fetches), and, more importantly, documents exactly + * which pointers are protected by RCU and checks that the pointer is + * annotated as __rcu. */ #define rcu_dereference_check(p, c) \ - ({ \ - __do_rcu_dereference_check(c); \ - rcu_dereference_raw(p); \ - }) + __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) /** - * rcu_dereference_protected - fetch RCU pointer when updates prevented + * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_bh_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) + +/** + * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_check(). + */ +#define rcu_dereference_sched_check(p, c) \ + __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ + __rcu) + +#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ + +/** + * rcu_dereference_index_check() - rcu_dereference for indices with debug checking + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * Similar to rcu_dereference_check(), but omits the sparse checking. + * This allows rcu_dereference_index_check() to be used on integers, + * which can then be used as array indices. Attempting to use + * rcu_dereference_check() on an integer will give compiler warnings + * because the sparse address-space mechanism relies on dereferencing + * the RCU-protected pointer. Dereferencing integers is not something + * that even gcc will put up with. + * + * Note that this function does not implicitly check for RCU read-side + * critical sections. If this function gains lots of uses, it might + * make sense to provide versions for each flavor of RCU, but it does + * not make sense as of early 2010. + */ +#define rcu_dereference_index_check(p, c) \ + __rcu_dereference_index_check((p), (c)) + +/** + * rcu_dereference_protected() - fetch RCU pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place * * Return the value of the specified RCU-protected pointer, but omit * both the smp_read_barrier_depends() and the ACCESS_ONCE(). This @@ -263,35 +453,61 @@ extern int rcu_my_thread_group_empty(void); * prevent the compiler from repeating this reference or combining it * with other references, so it should not be used without protection * of appropriate locks. + * + * This function is only for update-side use. Using this function + * when protected only by rcu_read_lock() will result in infrequent + * but very ugly failures. */ #define rcu_dereference_protected(p, c) \ - ({ \ - __do_rcu_dereference_check(c); \ - (p); \ - }) + __rcu_dereference_protected((p), (c), __rcu) -#else /* #ifdef CONFIG_PROVE_RCU */ +/** + * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-bh counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_bh_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#define rcu_dereference_check(p, c) rcu_dereference_raw(p) -#define rcu_dereference_protected(p, c) (p) +/** + * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented + * @p: The pointer to read, prior to dereferencing + * @c: The conditions under which the dereference will take place + * + * This is the RCU-sched counterpart to rcu_dereference_protected(). + */ +#define rcu_dereference_sched_protected(p, c) \ + __rcu_dereference_protected((p), (c), __rcu) -#endif /* #else #ifdef CONFIG_PROVE_RCU */ /** - * rcu_access_pointer - fetch RCU pointer with no dereferencing + * rcu_dereference() - fetch RCU-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing * - * Return the value of the specified RCU-protected pointer, but omit the - * smp_read_barrier_depends() and keep the ACCESS_ONCE(). This is useful - * when the value of this pointer is accessed, but the pointer is not - * dereferenced, for example, when testing an RCU-protected pointer against - * NULL. This may also be used in cases where update-side locks prevent - * the value of the pointer from changing, but rcu_dereference_protected() - * is a lighter-weight primitive for this use case. + * This is a simple wrapper around rcu_dereference_check(). + */ +#define rcu_dereference(p) rcu_dereference_check(p, 0) + +/** + * rcu_dereference_bh() - fetch an RCU-bh-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. + */ +#define rcu_dereference_bh(p) rcu_dereference_bh_check(p, 0) + +/** + * rcu_dereference_sched() - fetch RCU-sched-protected pointer for dereferencing + * @p: The pointer to read, prior to dereferencing + * + * Makes rcu_dereference_check() do the dirty work. */ -#define rcu_access_pointer(p) ACCESS_ONCE(p) +#define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) /** - * rcu_read_lock - mark the beginning of an RCU read-side critical section. + * rcu_read_lock() - mark the beginning of an RCU read-side critical section * * When synchronize_rcu() is invoked on one CPU while other CPUs * are within RCU read-side critical sections, then the @@ -302,7 +518,7 @@ extern int rcu_my_thread_group_empty(void); * until after the all the other CPUs exit their critical sections. * * Note, however, that RCU callbacks are permitted to run concurrently - * with RCU read-side critical sections. One way that this can happen + * with new RCU read-side critical sections. One way that this can happen * is via the following sequence of events: (1) CPU 0 enters an RCU * read-side critical section, (2) CPU 1 invokes call_rcu() to register * an RCU callback, (3) CPU 0 exits the RCU read-side critical section, @@ -317,7 +533,20 @@ extern int rcu_my_thread_group_empty(void); * will be deferred until the outermost RCU read-side critical section * completes. * - * It is illegal to block while in an RCU read-side critical section. + * You can avoid reading and understanding the next paragraph by + * following this rule: don't put anything in an rcu_read_lock() RCU + * read-side critical section that would block in a !PREEMPT kernel. + * But if you want the full story, read on! + * + * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU), it + * is illegal to block while in an RCU read-side critical section. In + * preemptible RCU implementations (TREE_PREEMPT_RCU and TINY_PREEMPT_RCU) + * in CONFIG_PREEMPT kernel builds, RCU read-side critical sections may + * be preempted, but explicit blocking is illegal. Finally, in preemptible + * RCU implementations in real-time (CONFIG_PREEMPT_RT) kernel builds, + * RCU read-side critical sections may be preempted and they may also + * block, but only when acquiring spinlocks that are subject to priority + * inheritance. */ static inline void rcu_read_lock(void) { @@ -337,7 +566,7 @@ static inline void rcu_read_lock(void) */ /** - * rcu_read_unlock - marks the end of an RCU read-side critical section. + * rcu_read_unlock() - marks the end of an RCU read-side critical section. * * See rcu_read_lock() for more information. */ @@ -349,15 +578,16 @@ static inline void rcu_read_unlock(void) } /** - * rcu_read_lock_bh - mark the beginning of a softirq-only RCU critical section + * rcu_read_lock_bh() - mark the beginning of an RCU-bh critical section * * This is equivalent of rcu_read_lock(), but to be used when updates - * are being done using call_rcu_bh(). Since call_rcu_bh() callbacks - * consider completion of a softirq handler to be a quiescent state, - * a process in RCU read-side critical section must be protected by - * disabling softirqs. Read-side critical sections in interrupt context - * can use just rcu_read_lock(). - * + * are being done using call_rcu_bh() or synchronize_rcu_bh(). Since + * both call_rcu_bh() and synchronize_rcu_bh() consider completion of a + * softirq handler to be a quiescent state, a process in RCU read-side + * critical section must be protected by disabling softirqs. Read-side + * critical sections in interrupt context can use just rcu_read_lock(), + * though this should at least be commented to avoid confusing people + * reading the code. */ static inline void rcu_read_lock_bh(void) { @@ -379,13 +609,12 @@ static inline void rcu_read_unlock_bh(void) } /** - * rcu_read_lock_sched - mark the beginning of a RCU-classic critical section + * rcu_read_lock_sched() - mark the beginning of a RCU-sched critical section * - * Should be used with either - * - synchronize_sched() - * or - * - call_rcu_sched() and rcu_barrier_sched() - * on the write-side to insure proper synchronization. + * This is equivalent of rcu_read_lock(), but to be used when updates + * are being done using call_rcu_sched() or synchronize_rcu_sched(). + * Read-side critical sections can also be introduced by anything that + * disables preemption, including local_irq_disable() and friends. */ static inline void rcu_read_lock_sched(void) { @@ -420,54 +649,14 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) preempt_enable_notrace(); } - -/** - * rcu_dereference_raw - fetch an RCU-protected pointer - * - * The caller must be within some flavor of RCU read-side critical - * section, or must be otherwise preventing the pointer from changing, - * for example, by holding an appropriate lock. This pointer may later - * be safely dereferenced. It is the caller's responsibility to have - * done the right thing, as this primitive does no checking of any kind. - * - * Inserts memory barriers on architectures that require them - * (currently only the Alpha), and, more importantly, documents - * exactly which pointers are protected by RCU. - */ -#define rcu_dereference_raw(p) ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) - -/** - * rcu_dereference - fetch an RCU-protected pointer, checking for RCU - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference(p) \ - rcu_dereference_check(p, rcu_read_lock_held()) - /** - * rcu_dereference_bh - fetch an RCU-protected pointer, checking for RCU-bh + * rcu_assign_pointer() - assign to RCU-protected pointer + * @p: pointer to assign to + * @v: value to assign (publish) * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_bh(p) \ - rcu_dereference_check(p, rcu_read_lock_bh_held()) - -/** - * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched - * - * Makes rcu_dereference_check() do the dirty work. - */ -#define rcu_dereference_sched(p) \ - rcu_dereference_check(p, rcu_read_lock_sched_held()) - -/** - * rcu_assign_pointer - assign (publicize) a pointer to a newly - * initialized structure that will be dereferenced by RCU read-side - * critical sections. Returns the value assigned. + * Assigns the specified value to the specified RCU-protected + * pointer, ensuring that any concurrent RCU readers will see + * any prior initialization. Returns the value assigned. * * Inserts memory barriers on architectures that require them * (pretty much all of them other than x86), and also prevents @@ -476,14 +665,17 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * call documents which pointers will be dereferenced by RCU read-side * code. */ - #define rcu_assign_pointer(p, v) \ - ({ \ - if (!__builtin_constant_p(v) || \ - ((v) != NULL)) \ - smp_wmb(); \ - (p) = (v); \ - }) + __rcu_assign_pointer((p), (v), __rcu) + +/** + * RCU_INIT_POINTER() - initialize an RCU protected pointer + * + * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep + * splats. + */ +#define RCU_INIT_POINTER(p, v) \ + p = (typeof(*v) __force __rcu *)(v) /* Infrastructure to implement the synchronize_() primitives. */ @@ -494,26 +686,37 @@ struct rcu_synchronize { extern void wakeme_after_rcu(struct rcu_head *head); +#ifdef CONFIG_PREEMPT_RCU + /** - * call_rcu - Queue an RCU callback for invocation after a grace period. + * call_rcu() - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period + * @func: actual callback function to be invoked after the grace period * - * The update function will be invoked some time after a full grace - * period elapses, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical + * The callback function will be invoked some time after a full grace + * period elapses, in other words after all pre-existing RCU read-side + * critical sections have completed. However, the callback function + * might well execute concurrently with RCU read-side critical sections + * that started after call_rcu() was invoked. RCU read-side critical * sections are delimited by rcu_read_lock() and rcu_read_unlock(), * and may be nested. */ extern void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head)); +#else /* #ifdef CONFIG_PREEMPT_RCU */ + +/* In classic RCU, call_rcu() is just call_rcu_sched(). */ +#define call_rcu call_rcu_sched + +#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ + /** - * call_rcu_bh - Queue an RCU for invocation after a quicker grace period. + * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. * @head: structure to be used for queueing the RCU updates. - * @func: actual update function to be invoked after the grace period + * @func: actual callback function to be invoked after the grace period * - * The update function will be invoked some time after a full grace + * The callback function will be invoked some time after a full grace * period elapses, in other words after all currently executing RCU * read-side critical sections have completed. call_rcu_bh() assumes * that the read-side critical sections end on completion of a softirq @@ -566,37 +769,4 @@ static inline void debug_rcu_head_unqueue(struct rcu_head *head) } #endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ -#ifndef CONFIG_PROVE_RCU -#define __do_rcu_dereference_check(c) do { } while (0) -#endif /* #ifdef CONFIG_PROVE_RCU */ - -#define __rcu_dereference_index_check(p, c) \ - ({ \ - typeof(p) _________p1 = ACCESS_ONCE(p); \ - __do_rcu_dereference_check(c); \ - smp_read_barrier_depends(); \ - (_________p1); \ - }) - -/** - * rcu_dereference_index_check() - rcu_dereference for indices with debug checking - * @p: The pointer to read, prior to dereferencing - * @c: The conditions under which the dereference will take place - * - * Similar to rcu_dereference_check(), but omits the sparse checking. - * This allows rcu_dereference_index_check() to be used on integers, - * which can then be used as array indices. Attempting to use - * rcu_dereference_check() on an integer will give compiler warnings - * because the sparse address-space mechanism relies on dereferencing - * the RCU-protected pointer. Dereferencing integers is not something - * that even gcc will put up with. - * - * Note that this function does not implicitly check for RCU read-side - * critical sections. If this function gains lots of uses, it might - * make sense to provide versions for each flavor of RCU, but it does - * not make sense as of early 2010. - */ -#define rcu_dereference_index_check(p, c) \ - __rcu_dereference_index_check((p), (c)) - #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index e2e8931..13877cb 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -27,103 +27,101 @@ #include -void rcu_sched_qs(int cpu); -void rcu_bh_qs(int cpu); -static inline void rcu_note_context_switch(int cpu) -{ - rcu_sched_qs(cpu); -} +#define rcu_init_sched() do { } while (0) -#define __rcu_read_lock() preempt_disable() -#define __rcu_read_unlock() preempt_enable() -#define __rcu_read_lock_bh() local_bh_disable() -#define __rcu_read_unlock_bh() local_bh_enable() -#define call_rcu_sched call_rcu +#ifdef CONFIG_TINY_RCU -#define rcu_init_sched() do { } while (0) -extern void rcu_check_callbacks(int cpu, int user); +static inline void synchronize_rcu_expedited(void) +{ + synchronize_sched(); /* Only one CPU, so pretty fast anyway!!! */ +} -static inline int rcu_needs_cpu(int cpu) +static inline void rcu_barrier(void) { - return 0; + rcu_barrier_sched(); /* Only one CPU, so only one list of callbacks! */ } -/* - * Return the number of grace periods. - */ -static inline long rcu_batches_completed(void) +#else /* #ifdef CONFIG_TINY_RCU */ + +void rcu_barrier(void); +void synchronize_rcu_expedited(void); + +#endif /* #else #ifdef CONFIG_TINY_RCU */ + +static inline void synchronize_rcu_bh(void) { - return 0; + synchronize_sched(); } -/* - * Return the number of bottom-half grace periods. - */ -static inline long rcu_batches_completed_bh(void) +static inline void synchronize_rcu_bh_expedited(void) { - return 0; + synchronize_sched(); } -static inline void rcu_force_quiescent_state(void) +#ifdef CONFIG_TINY_RCU + +static inline void rcu_preempt_note_context_switch(void) { } -static inline void rcu_bh_force_quiescent_state(void) +static inline void exit_rcu(void) { } -static inline void rcu_sched_force_quiescent_state(void) +static inline int rcu_needs_cpu(int cpu) { + return 0; } -extern void synchronize_sched(void); +#else /* #ifdef CONFIG_TINY_RCU */ + +void rcu_preempt_note_context_switch(void); +extern void exit_rcu(void); +int rcu_preempt_needs_cpu(void); -static inline void synchronize_rcu(void) +static inline int rcu_needs_cpu(int cpu) { - synchronize_sched(); + return rcu_preempt_needs_cpu(); } -static inline void synchronize_rcu_bh(void) +#endif /* #else #ifdef CONFIG_TINY_RCU */ + +static inline void rcu_note_context_switch(int cpu) { - synchronize_sched(); + rcu_sched_qs(cpu); + rcu_preempt_note_context_switch(); } -static inline void synchronize_rcu_expedited(void) +/* + * Return the number of grace periods. + */ +static inline long rcu_batches_completed(void) { - synchronize_sched(); + return 0; } -static inline void synchronize_rcu_bh_expedited(void) +/* + * Return the number of bottom-half grace periods. + */ +static inline long rcu_batches_completed_bh(void) { - synchronize_sched(); + return 0; } -struct notifier_block; - -#ifdef CONFIG_NO_HZ - -extern void rcu_enter_nohz(void); -extern void rcu_exit_nohz(void); - -#else /* #ifdef CONFIG_NO_HZ */ - -static inline void rcu_enter_nohz(void) +static inline void rcu_force_quiescent_state(void) { } -static inline void rcu_exit_nohz(void) +static inline void rcu_bh_force_quiescent_state(void) { } -#endif /* #else #ifdef CONFIG_NO_HZ */ - -static inline void exit_rcu(void) +static inline void rcu_sched_force_quiescent_state(void) { } -static inline int rcu_preempt_depth(void) +static inline void rcu_cpu_stall_reset(void) { - return 0; } #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index c0ed1c0..95518e6 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,64 +30,23 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -struct notifier_block; - -extern void rcu_sched_qs(int cpu); -extern void rcu_bh_qs(int cpu); extern void rcu_note_context_switch(int cpu); extern int rcu_needs_cpu(int cpu); +extern void rcu_cpu_stall_reset(void); #ifdef CONFIG_TREE_PREEMPT_RCU -extern void __rcu_read_lock(void); -extern void __rcu_read_unlock(void); -extern void synchronize_rcu(void); extern void exit_rcu(void); -/* - * Defined as macro as it is a very low level header - * included from areas that don't even know about current - */ -#define rcu_preempt_depth() (current->rcu_read_lock_nesting) - #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ -static inline void __rcu_read_lock(void) -{ - preempt_disable(); -} - -static inline void __rcu_read_unlock(void) -{ - preempt_enable(); -} - -#define synchronize_rcu synchronize_sched - static inline void exit_rcu(void) { } -static inline int rcu_preempt_depth(void) -{ - return 0; -} - #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */ -static inline void __rcu_read_lock_bh(void) -{ - local_bh_disable(); -} -static inline void __rcu_read_unlock_bh(void) -{ - local_bh_enable(); -} - -extern void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)); extern void synchronize_rcu_bh(void); -extern void synchronize_sched(void); extern void synchronize_rcu_expedited(void); static inline void synchronize_rcu_bh_expedited(void) @@ -95,7 +54,7 @@ static inline void synchronize_rcu_bh_expedited(void) synchronize_sched_expedited(); } -extern void rcu_check_callbacks(int cpu, int user); +extern void rcu_barrier(void); extern long rcu_batches_completed(void); extern long rcu_batches_completed_bh(void); @@ -104,18 +63,6 @@ extern void rcu_force_quiescent_state(void); extern void rcu_bh_force_quiescent_state(void); extern void rcu_sched_force_quiescent_state(void); -#ifdef CONFIG_NO_HZ -void rcu_enter_nohz(void); -void rcu_exit_nohz(void); -#else /* CONFIG_NO_HZ */ -static inline void rcu_enter_nohz(void) -{ -} -static inline void rcu_exit_nohz(void) -{ -} -#endif /* CONFIG_NO_HZ */ - /* A context switch is a grace period for RCU-sched and RCU-bh. */ static inline int rcu_blocking_is_gp(void) { diff --git a/include/linux/sched.h b/include/linux/sched.h index ce160d6..e18473f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1202,11 +1202,13 @@ struct task_struct { unsigned int policy; cpumask_t cpus_allowed; -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; char rcu_read_unlock_special; - struct rcu_node *rcu_blocked_node; struct list_head rcu_node_entry; +#endif /* #ifdef CONFIG_PREEMPT_RCU */ +#ifdef CONFIG_TREE_PREEMPT_RCU + struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) @@ -1288,9 +1290,9 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - const struct cred *real_cred; /* objective and real subjective task + const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ - const struct cred *cred; /* effective (overridable) subjective task + const struct cred __rcu *cred; /* effective (overridable) subjective task * credentials (COW) */ struct mutex cred_guard_mutex; /* guard against foreign influences on * credential calculations @@ -1418,7 +1420,7 @@ struct task_struct { #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock */ - struct css_set *cgroups; + struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif @@ -1740,7 +1742,7 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t * #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -#ifdef CONFIG_TREE_PREEMPT_RCU +#ifdef CONFIG_PREEMPT_RCU #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */ #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */ @@ -1749,7 +1751,9 @@ static inline void rcu_copy_process(struct task_struct *p) { p->rcu_read_lock_nesting = 0; p->rcu_read_unlock_special = 0; +#ifdef CONFIG_TREE_PREEMPT_RCU p->rcu_blocked_node = NULL; +#endif INIT_LIST_HEAD(&p->rcu_node_entry); } @@ -2109,7 +2113,9 @@ extern void daemonize(const char *, ...); extern int allow_signal(int); extern int disallow_signal(int); -extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *); +extern int do_execve(const char *, + const char __user * const __user *, + const char __user * const __user *, struct pt_regs *); extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index ae0a528..92e52a1 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -213,6 +213,9 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @dma_alignment: SPI controller constraint on DMA buffers alignment. * @mode_bits: flags understood by this controller driver * @flags: other constraints relevant to this driver + * @bus_lock_spinlock: spinlock for SPI bus locking + * @bus_lock_mutex: mutex for SPI bus locking + * @bus_lock_flag: indicates that the SPI bus is locked for exclusive use * @setup: updates the device mode and clocking records used by a * device's SPI controller; protocol code may call this. This * must fail if an unrecognized or unsupported mode is requested. diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 4d5d2f5..58971e8 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -108,19 +108,43 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ /** - * srcu_dereference - fetch SRCU-protected pointer with checking + * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. + * @c: condition to check for update-side use * - * Makes rcu_dereference_check() do the dirty work. + * If PROVE_RCU is enabled, invoking this outside of an RCU read-side + * critical section will result in an RCU-lockdep splat, unless @c evaluates + * to 1. The @c argument will normally be a logical expression containing + * lockdep_is_held() calls. */ -#define srcu_dereference(p, sp) \ - rcu_dereference_check(p, srcu_read_lock_held(sp)) +#define srcu_dereference_check(p, sp, c) \ + __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) + +/** + * srcu_dereference - fetch SRCU-protected pointer for later dereferencing + * @p: the pointer to fetch and protect for later dereferencing + * @sp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. + * + * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU + * is enabled, invoking this outside of an RCU read-side critical + * section will result in an RCU-lockdep splat. + */ +#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) /** * srcu_read_lock - register a new reader for an SRCU-protected structure. * @sp: srcu_struct in which to register the new reader. * * Enter an SRCU read-side critical section. Note that SRCU read-side - * critical sections may be nested. + * critical sections may be nested. However, it is illegal to + * call anything that waits on an SRCU grace period for the same + * srcu_struct, whether directly or indirectly. Please note that + * one way to indirectly wait on an SRCU grace period is to acquire + * a mutex that is held elsewhere while calling synchronize_srcu() or + * synchronize_srcu_expedited(). */ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index 671538d..8eee9db 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -69,7 +69,7 @@ struct gss_cl_ctx { enum rpc_gss_proc gc_proc; u32 gc_seq; spinlock_t gc_seq_lock; - struct gss_ctx *gc_gss_ctx; + struct gss_ctx __rcu *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; u32 gc_win; unsigned long gc_expiry; @@ -80,7 +80,7 @@ struct gss_upcall_msg; struct gss_cred { struct rpc_cred gc_base; enum rpc_gss_svc gc_service; - struct gss_cl_ctx *gc_ctx; + struct gss_cl_ctx __rcu *gc_ctx; struct gss_upcall_msg *gc_upcall; unsigned long gc_upcall_timestamp; unsigned char gc_machine_cred : 1; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 6e5d197..e6319d1 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -820,7 +820,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, u64 mask, int fd, const char __user *pathname); -int kernel_execve(const char *filename, char *const argv[], char *const envp[]); +int kernel_execve(const char *filename, const char *const argv[], const char *const envp[]); asmlinkage long sys_perf_event_open( diff --git a/include/linux/tty.h b/include/linux/tty.h index 1437da3..67d64e6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -329,6 +329,13 @@ struct tty_struct { struct tty_port *port; }; +/* Each of a tty's open files has private_data pointing to tty_file_private */ +struct tty_file_private { + struct tty_struct *tty; + struct file *file; + struct list_head list; +}; + /* tty magic number */ #define TTY_MAGIC 0x5401 @@ -458,6 +465,7 @@ extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(void); +extern void tty_add_file(struct tty_struct *tty, struct file *file); extern void free_tty_struct(struct tty_struct *tty); extern void initialize_tty_struct(struct tty_struct *tty, struct tty_driver *driver, int idx); @@ -470,6 +478,7 @@ extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty); extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty); extern struct mutex tty_mutex; +extern spinlock_t tty_files_lock; extern void tty_write_unlock(struct tty_struct *tty); extern int tty_write_lock(struct tty_struct *tty, int ndelay); diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index 726cc35..dd1fdb8 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -45,7 +45,8 @@ static inline u32 task_cls_classid(struct task_struct *p) return 0; rcu_read_lock(); - id = rcu_dereference(net_cls_subsys_id); + id = rcu_dereference_index_check(net_cls_subsys_id, + rcu_read_lock_held()); if (id >= 0) classid = container_of(task_subsys_state(p, id), struct cgroup_cls_state, css)->classid; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e624dae..caf17db 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -75,7 +75,7 @@ struct nf_conntrack_helper; /* nf_conn feature for connections that have a helper */ struct nf_conn_help { /* Helper. if any */ - struct nf_conntrack_helper *helper; + struct nf_conntrack_helper __rcu *helper; union nf_conntrack_help help; diff --git a/include/sound/emu10k1.h b/include/sound/emu10k1.h index 6a664c3..7dc97d1 100644 --- a/include/sound/emu10k1.h +++ b/include/sound/emu10k1.h @@ -1707,6 +1707,7 @@ struct snd_emu10k1 { unsigned int card_type; /* EMU10K1_CARD_* */ unsigned int ecard_ctrl; /* ecard control bits */ unsigned long dma_mask; /* PCI DMA mask */ + unsigned int delay_pcm_irq; /* in samples */ int max_cache_pages; /* max memory size / PAGE_SIZE */ struct snd_dma_buffer silent_page; /* silent page */ struct snd_dma_buffer ptb_pages; /* page table pages */ diff --git a/init/Kconfig b/init/Kconfig index 2de5b1c..a619a1a 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -340,6 +340,7 @@ choice config TREE_RCU bool "Tree-based hierarchical RCU" + depends on !PREEMPT && SMP help This option selects the RCU implementation that is designed for very large SMP system with hundreds or @@ -347,7 +348,7 @@ config TREE_RCU smaller systems. config TREE_PREEMPT_RCU - bool "Preemptable tree-based hierarchical RCU" + bool "Preemptible tree-based hierarchical RCU" depends on PREEMPT help This option selects the RCU implementation that is @@ -365,8 +366,22 @@ config TINY_RCU is not required. This option greatly reduces the memory footprint of RCU. +config TINY_PREEMPT_RCU + bool "Preemptible UP-only small-memory-footprint RCU" + depends on !SMP && PREEMPT + help + This option selects the RCU implementation that is designed + for real-time UP systems. This option greatly reduces the + memory footprint of RCU. + endchoice +config PREEMPT_RCU + def_bool ( TREE_PREEMPT_RCU || TINY_PREEMPT_RCU ) + help + This option enables preemptible-RCU code that is common between + the TREE_PREEMPT_RCU and TINY_PREEMPT_RCU implementations. + config RCU_TRACE bool "Enable tracing for RCU" depends on TREE_RCU || TREE_PREEMPT_RCU @@ -387,9 +402,12 @@ config RCU_FANOUT help This option controls the fanout of hierarchical implementations of RCU, allowing RCU to work efficiently on machines with - large numbers of CPUs. This value must be at least the cube - root of NR_CPUS, which allows NR_CPUS up to 32,768 for 32-bit - systems and up to 262,144 for 64-bit systems. + large numbers of CPUs. This value must be at least the fourth + root of NR_CPUS, which allows NR_CPUS to be insanely large. + The default value of RCU_FANOUT should be used for production + systems, but if you are stress-testing the RCU implementation + itself, small RCU_FANOUT values allow you to test large-system + code paths on small(er) systems. Select a specific number if testing RCU itself. Take the default if unsure. diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c index 2b10853..3098a38 100644 --- a/init/do_mounts_initrd.c +++ b/init/do_mounts_initrd.c @@ -24,10 +24,11 @@ static int __init no_initrd(char *str) __setup("noinitrd", no_initrd); -static int __init do_linuxrc(void * shell) +static int __init do_linuxrc(void *_shell) { - static char *argv[] = { "linuxrc", NULL, }; - extern char * envp_init[]; + static const char *argv[] = { "linuxrc", NULL, }; + extern const char *envp_init[]; + const char *shell = _shell; sys_close(old_fd);sys_close(root_fd); sys_setsid(); diff --git a/init/main.c b/init/main.c index 22d61cb..94ab488 100644 --- a/init/main.c +++ b/init/main.c @@ -197,8 +197,8 @@ static int __init set_reset_devices(char *str) __setup("reset_devices", set_reset_devices); -static char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; -char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; +static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, }; +const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, }; static const char *panic_later, *panic_param; extern const struct obs_kernel_param __setup_start[], __setup_end[]; @@ -809,7 +809,7 @@ static void __init do_pre_smp_initcalls(void) do_one_initcall(*fn); } -static void run_init_process(char *init_filename) +static void run_init_process(const char *init_filename) { argv_init[0] = init_filename; kernel_execve(init_filename, argv_init, envp_init); diff --git a/kernel/Makefile b/kernel/Makefile index 0b72d1a..17046b6 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_TREE_RCU) += rcutree.o obj-$(CONFIG_TREE_PREEMPT_RCU) += rcutree.o obj-$(CONFIG_TREE_RCU_TRACE) += rcutree_trace.o obj-$(CONFIG_TINY_RCU) += rcutiny.o +obj-$(CONFIG_TINY_PREEMPT_RCU) += rcutiny.o obj-$(CONFIG_RELAY) += relay.o obj-$(CONFIG_SYSCTL) += utsname_sysctl.o obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 192f88c..e5c5497 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -138,7 +138,7 @@ struct css_id { * is called after synchronize_rcu(). But for safe use, css_is_removed() * css_tryget() should be used for avoiding race. */ - struct cgroup_subsys_state *css; + struct cgroup_subsys_state __rcu *css; /* * ID of this css. */ diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index c438f54..be775f7 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -255,7 +255,14 @@ extern void kdb_ps1(const struct task_struct *p); extern void kdb_print_nameval(const char *name, unsigned long val); extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info); extern void kdb_meminfo_proc_show(void); +#ifdef CONFIG_KALLSYMS extern const char *kdb_walk_kallsyms(loff_t *pos); +#else /* ! CONFIG_KALLSYMS */ +static inline const char *kdb_walk_kallsyms(loff_t *pos) +{ + return NULL; +} +#endif /* ! CONFIG_KALLSYMS */ extern char *kdb_getstr(char *, size_t, char *); /* Defines for kdb_symbol_print */ diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c index 45344d5..6b2485d 100644 --- a/kernel/debug/kdb/kdb_support.c +++ b/kernel/debug/kdb/kdb_support.c @@ -82,8 +82,8 @@ static char *kdb_name_table[100]; /* arbitrary size */ int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab) { int ret = 0; - unsigned long symbolsize; - unsigned long offset; + unsigned long symbolsize = 0; + unsigned long offset = 0; #define knt1_size 128 /* must be >= kallsyms table size */ char *knt1 = NULL; diff --git a/kernel/exit.c b/kernel/exit.c index 671ed56..0312022 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1386,8 +1386,7 @@ static int wait_task_stopped(struct wait_opts *wo, if (!unlikely(wo->wo_flags & WNOWAIT)) *p_code = 0; - /* don't need the RCU readlock here as we're holding a spinlock */ - uid = __task_cred(p)->uid; + uid = task_uid(p); unlock_sig: spin_unlock_irq(&p->sighand->siglock); if (!exit_code) @@ -1460,7 +1459,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) } if (!unlikely(wo->wo_flags & WNOWAIT)) p->signal->flags &= ~SIGNAL_STOP_CONTINUED; - uid = __task_cred(p)->uid; + uid = task_uid(p); spin_unlock_irq(&p->sighand->siglock); pid = task_pid_vnr(p); diff --git a/kernel/fork.c b/kernel/fork.c index 98b4508..856eac3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -752,13 +752,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) struct fs_struct *fs = current->fs; if (clone_flags & CLONE_FS) { /* tsk->fs is already what we want */ - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->in_exec) { - write_unlock(&fs->lock); + spin_unlock(&fs->lock); return -EAGAIN; } fs->users++; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); return 0; } tsk->fs = copy_fs_struct(fs); @@ -1676,13 +1676,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) if (new_fs) { fs = current->fs; - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = new_fs; if (--fs->users) new_fs = NULL; else new_fs = fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } if (new_mm) { diff --git a/kernel/kmod.c b/kernel/kmod.c index 6e9b196..9cd0591 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -153,7 +153,9 @@ static int ____call_usermodehelper(void *data) goto fail; } - retval = kernel_execve(sub_info->path, sub_info->argv, sub_info->envp); + retval = kernel_execve(sub_info->path, + (const char *const *)sub_info->argv, + (const char *const *)sub_info->envp); /* Exec failed? */ fail: diff --git a/kernel/pid.c b/kernel/pid.c index d55c6fb..39b65b6 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -401,7 +401,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) struct task_struct *result = NULL; if (pid) { struct hlist_node *first; - first = rcu_dereference_check(pid->tasks[type].first, + first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), rcu_read_lock_held() || lockdep_tasklist_lock_is_held()); if (first) @@ -416,6 +416,7 @@ EXPORT_SYMBOL(pid_task); */ struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) { + rcu_lockdep_assert(rcu_read_lock_held()); return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); } diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index 4d16983..6c79e85 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -73,12 +73,14 @@ int debug_lockdep_rcu_enabled(void) EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); /** - * rcu_read_lock_bh_held - might we be in RCU-bh read-side critical section? + * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? * * Check for bottom half being disabled, which covers both the * CONFIG_PROVE_RCU and not cases. Note that if someone uses * rcu_read_lock_bh(), but then later enables BH, lockdep (if enabled) - * will show the situation. + * will show the situation. This is useful for debug checks in functions + * that require that they be called within an RCU read-side critical + * section. * * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. */ diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c index 196ec02..d806735 100644 --- a/kernel/rcutiny.c +++ b/kernel/rcutiny.c @@ -59,6 +59,14 @@ int rcu_scheduler_active __read_mostly; EXPORT_SYMBOL_GPL(rcu_scheduler_active); #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +/* Forward declarations for rcutiny_plugin.h. */ +static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp); +static void __call_rcu(struct rcu_head *head, + void (*func)(struct rcu_head *rcu), + struct rcu_ctrlblk *rcp); + +#include "rcutiny_plugin.h" + #ifdef CONFIG_NO_HZ static long rcu_dynticks_nesting = 1; @@ -140,6 +148,7 @@ void rcu_check_callbacks(int cpu, int user) rcu_sched_qs(cpu); else if (!in_softirq()) rcu_bh_qs(cpu); + rcu_preempt_check_callbacks(); } /* @@ -162,6 +171,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) *rcp->donetail = NULL; if (rcp->curtail == rcp->donetail) rcp->curtail = &rcp->rcucblist; + rcu_preempt_remove_callbacks(rcp); rcp->donetail = &rcp->rcucblist; local_irq_restore(flags); @@ -182,6 +192,7 @@ static void rcu_process_callbacks(struct softirq_action *unused) { __rcu_process_callbacks(&rcu_sched_ctrlblk); __rcu_process_callbacks(&rcu_bh_ctrlblk); + rcu_preempt_process_callbacks(); } /* @@ -223,15 +234,15 @@ static void __call_rcu(struct rcu_head *head, } /* - * Post an RCU callback to be invoked after the end of an RCU grace + * Post an RCU callback to be invoked after the end of an RCU-sched grace * period. But since we have but one CPU, that would be after any * quiescent state. */ -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) { __call_rcu(head, func, &rcu_sched_ctrlblk); } -EXPORT_SYMBOL_GPL(call_rcu); +EXPORT_SYMBOL_GPL(call_rcu_sched); /* * Post an RCU bottom-half callback to be invoked after any subsequent @@ -243,20 +254,6 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) } EXPORT_SYMBOL_GPL(call_rcu_bh); -void rcu_barrier(void) -{ - struct rcu_synchronize rcu; - - init_rcu_head_on_stack(&rcu.head); - init_completion(&rcu.completion); - /* Will wake me after RCU finished. */ - call_rcu(&rcu.head, wakeme_after_rcu); - /* Wait for it. */ - wait_for_completion(&rcu.completion); - destroy_rcu_head_on_stack(&rcu.head); -} -EXPORT_SYMBOL_GPL(rcu_barrier); - void rcu_barrier_bh(void) { struct rcu_synchronize rcu; @@ -289,5 +286,3 @@ void __init rcu_init(void) { open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); } - -#include "rcutiny_plugin.h" diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index d223a92..c5bea11 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h @@ -1,7 +1,7 @@ /* - * Read-Copy Update mechanism for mutual exclusion (tree-based version) + * Read-Copy Update mechanism for mutual exclusion, the Bloatwatch edition * Internal non-public definitions that provide either classic - * or preemptable semantics. + * or preemptible semantics. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -17,11 +17,583 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * Copyright IBM Corporation, 2009 + * Copyright (c) 2010 Linaro * * Author: Paul E. McKenney */ +#ifdef CONFIG_TINY_PREEMPT_RCU + +#include + +/* Global control variables for preemptible RCU. */ +struct rcu_preempt_ctrlblk { + struct rcu_ctrlblk rcb; /* curtail: ->next ptr of last CB for GP. */ + struct rcu_head **nexttail; + /* Tasks blocked in a preemptible RCU */ + /* read-side critical section while an */ + /* preemptible-RCU grace period is in */ + /* progress must wait for a later grace */ + /* period. This pointer points to the */ + /* ->next pointer of the last task that */ + /* must wait for a later grace period, or */ + /* to &->rcb.rcucblist if there is no */ + /* such task. */ + struct list_head blkd_tasks; + /* Tasks blocked in RCU read-side critical */ + /* section. Tasks are placed at the head */ + /* of this list and age towards the tail. */ + struct list_head *gp_tasks; + /* Pointer to the first task blocking the */ + /* current grace period, or NULL if there */ + /* is not such task. */ + struct list_head *exp_tasks; + /* Pointer to first task blocking the */ + /* current expedited grace period, or NULL */ + /* if there is no such task. If there */ + /* is no current expedited grace period, */ + /* then there cannot be any such task. */ + u8 gpnum; /* Current grace period. */ + u8 gpcpu; /* Last grace period blocked by the CPU. */ + u8 completed; /* Last grace period completed. */ + /* If all three are equal, RCU is idle. */ +}; + +static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = { + .rcb.donetail = &rcu_preempt_ctrlblk.rcb.rcucblist, + .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, + .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, + .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), +}; + +static int rcu_preempted_readers_exp(void); +static void rcu_report_exp_done(void); + +/* + * Return true if the CPU has not yet responded to the current grace period. + */ +static int rcu_cpu_cur_gp(void) +{ + return rcu_preempt_ctrlblk.gpcpu != rcu_preempt_ctrlblk.gpnum; +} + +/* + * Check for a running RCU reader. Because there is only one CPU, + * there can be but one running RCU reader at a time. ;-) + */ +static int rcu_preempt_running_reader(void) +{ + return current->rcu_read_lock_nesting; +} + +/* + * Check for preempted RCU readers blocking any grace period. + * If the caller needs a reliable answer, it must disable hard irqs. + */ +static int rcu_preempt_blocked_readers_any(void) +{ + return !list_empty(&rcu_preempt_ctrlblk.blkd_tasks); +} + +/* + * Check for preempted RCU readers blocking the current grace period. + * If the caller needs a reliable answer, it must disable hard irqs. + */ +static int rcu_preempt_blocked_readers_cgp(void) +{ + return rcu_preempt_ctrlblk.gp_tasks != NULL; +} + +/* + * Return true if another preemptible-RCU grace period is needed. + */ +static int rcu_preempt_needs_another_gp(void) +{ + return *rcu_preempt_ctrlblk.rcb.curtail != NULL; +} + +/* + * Return true if a preemptible-RCU grace period is in progress. + * The caller must disable hardirqs. + */ +static int rcu_preempt_gp_in_progress(void) +{ + return rcu_preempt_ctrlblk.completed != rcu_preempt_ctrlblk.gpnum; +} + +/* + * Record a preemptible-RCU quiescent state for the specified CPU. Note + * that this just means that the task currently running on the CPU is + * in a quiescent state. There might be any number of tasks blocked + * while in an RCU read-side critical section. + * + * Unlike the other rcu_*_qs() functions, callers to this function + * must disable irqs in order to protect the assignment to + * ->rcu_read_unlock_special. + * + * Because this is a single-CPU implementation, the only way a grace + * period can end is if the CPU is in a quiescent state. The reason is + * that a blocked preemptible-RCU reader can exit its critical section + * only if the CPU is running it at the time. Therefore, when the + * last task blocking the current grace period exits its RCU read-side + * critical section, neither the CPU nor blocked tasks will be stopping + * the current grace period. (In contrast, SMP implementations + * might have CPUs running in RCU read-side critical sections that + * block later grace periods -- but this is not possible given only + * one CPU.) + */ +static void rcu_preempt_cpu_qs(void) +{ + /* Record both CPU and task as having responded to current GP. */ + rcu_preempt_ctrlblk.gpcpu = rcu_preempt_ctrlblk.gpnum; + current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; + + /* + * If there is no GP, or if blocked readers are still blocking GP, + * then there is nothing more to do. + */ + if (!rcu_preempt_gp_in_progress() || rcu_preempt_blocked_readers_cgp()) + return; + + /* Advance callbacks. */ + rcu_preempt_ctrlblk.completed = rcu_preempt_ctrlblk.gpnum; + rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.rcb.curtail; + rcu_preempt_ctrlblk.rcb.curtail = rcu_preempt_ctrlblk.nexttail; + + /* If there are no blocked readers, next GP is done instantly. */ + if (!rcu_preempt_blocked_readers_any()) + rcu_preempt_ctrlblk.rcb.donetail = rcu_preempt_ctrlblk.nexttail; + + /* If there are done callbacks, make RCU_SOFTIRQ process them. */ + if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) + raise_softirq(RCU_SOFTIRQ); +} + +/* + * Start a new RCU grace period if warranted. Hard irqs must be disabled. + */ +static void rcu_preempt_start_gp(void) +{ + if (!rcu_preempt_gp_in_progress() && rcu_preempt_needs_another_gp()) { + + /* Official start of GP. */ + rcu_preempt_ctrlblk.gpnum++; + + /* Any blocked RCU readers block new GP. */ + if (rcu_preempt_blocked_readers_any()) + rcu_preempt_ctrlblk.gp_tasks = + rcu_preempt_ctrlblk.blkd_tasks.next; + + /* If there is no running reader, CPU is done with GP. */ + if (!rcu_preempt_running_reader()) + rcu_preempt_cpu_qs(); + } +} + +/* + * We have entered the scheduler, and the current task might soon be + * context-switched away from. If this task is in an RCU read-side + * critical section, we will no longer be able to rely on the CPU to + * record that fact, so we enqueue the task on the blkd_tasks list. + * If the task started after the current grace period began, as recorded + * by ->gpcpu, we enqueue at the beginning of the list. Otherwise + * before the element referenced by ->gp_tasks (or at the tail if + * ->gp_tasks is NULL) and point ->gp_tasks at the newly added element. + * The task will dequeue itself when it exits the outermost enclosing + * RCU read-side critical section. Therefore, the current grace period + * cannot be permitted to complete until the ->gp_tasks pointer becomes + * NULL. + * + * Caller must disable preemption. + */ +void rcu_preempt_note_context_switch(void) +{ + struct task_struct *t = current; + unsigned long flags; + + local_irq_save(flags); /* must exclude scheduler_tick(). */ + if (rcu_preempt_running_reader() && + (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { + + /* Possibly blocking in an RCU read-side critical section. */ + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; + + /* + * If this CPU has already checked in, then this task + * will hold up the next grace period rather than the + * current grace period. Queue the task accordingly. + * If the task is queued for the current grace period + * (i.e., this CPU has not yet passed through a quiescent + * state for the current grace period), then as long + * as that task remains queued, the current grace period + * cannot end. + */ + list_add(&t->rcu_node_entry, &rcu_preempt_ctrlblk.blkd_tasks); + if (rcu_cpu_cur_gp()) + rcu_preempt_ctrlblk.gp_tasks = &t->rcu_node_entry; + } + + /* + * Either we were not in an RCU read-side critical section to + * begin with, or we have now recorded that critical section + * globally. Either way, we can now note a quiescent state + * for this CPU. Again, if we were in an RCU read-side critical + * section, and if that critical section was blocking the current + * grace period, then the fact that the task has been enqueued + * means that current grace period continues to be blocked. + */ + rcu_preempt_cpu_qs(); + local_irq_restore(flags); +} + +/* + * Tiny-preemptible RCU implementation for rcu_read_lock(). + * Just increment ->rcu_read_lock_nesting, shared state will be updated + * if we block. + */ +void __rcu_read_lock(void) +{ + current->rcu_read_lock_nesting++; + barrier(); /* needed if we ever invoke rcu_read_lock in rcutiny.c */ +} +EXPORT_SYMBOL_GPL(__rcu_read_lock); + +/* + * Handle special cases during rcu_read_unlock(), such as needing to + * notify RCU core processing or task having blocked during the RCU + * read-side critical section. + */ +static void rcu_read_unlock_special(struct task_struct *t) +{ + int empty; + int empty_exp; + unsigned long flags; + struct list_head *np; + int special; + + /* + * NMI handlers cannot block and cannot safely manipulate state. + * They therefore cannot possibly be special, so just leave. + */ + if (in_nmi()) + return; + + local_irq_save(flags); + + /* + * If RCU core is waiting for this CPU to exit critical section, + * let it know that we have done so. + */ + special = t->rcu_read_unlock_special; + if (special & RCU_READ_UNLOCK_NEED_QS) + rcu_preempt_cpu_qs(); + + /* Hardware IRQ handlers cannot block. */ + if (in_irq()) { + local_irq_restore(flags); + return; + } + + /* Clean up if blocked during RCU read-side critical section. */ + if (special & RCU_READ_UNLOCK_BLOCKED) { + t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; + + /* + * Remove this task from the ->blkd_tasks list and adjust + * any pointers that might have been referencing it. + */ + empty = !rcu_preempt_blocked_readers_cgp(); + empty_exp = rcu_preempt_ctrlblk.exp_tasks == NULL; + np = t->rcu_node_entry.next; + if (np == &rcu_preempt_ctrlblk.blkd_tasks) + np = NULL; + list_del(&t->rcu_node_entry); + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.gp_tasks) + rcu_preempt_ctrlblk.gp_tasks = np; + if (&t->rcu_node_entry == rcu_preempt_ctrlblk.exp_tasks) + rcu_preempt_ctrlblk.exp_tasks = np; + INIT_LIST_HEAD(&t->rcu_node_entry); + + /* + * If this was the last task on the current list, and if + * we aren't waiting on the CPU, report the quiescent state + * and start a new grace period if needed. + */ + if (!empty && !rcu_preempt_blocked_readers_cgp()) { + rcu_preempt_cpu_qs(); + rcu_preempt_start_gp(); + } + + /* + * If this was the last task on the expedited lists, + * then we need wake up the waiting task. + */ + if (!empty_exp && rcu_preempt_ctrlblk.exp_tasks == NULL) + rcu_report_exp_done(); + } + local_irq_restore(flags); +} + +/* + * Tiny-preemptible RCU implementation for rcu_read_unlock(). + * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost + * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then + * invoke rcu_read_unlock_special() to clean up after a context switch + * in an RCU read-side critical section and other special cases. + */ +void __rcu_read_unlock(void) +{ + struct task_struct *t = current; + + barrier(); /* needed if we ever invoke rcu_read_unlock in rcutiny.c */ + --t->rcu_read_lock_nesting; + barrier(); /* decrement before load of ->rcu_read_unlock_special */ + if (t->rcu_read_lock_nesting == 0 && + unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) + rcu_read_unlock_special(t); +#ifdef CONFIG_PROVE_LOCKING + WARN_ON_ONCE(t->rcu_read_lock_nesting < 0); +#endif /* #ifdef CONFIG_PROVE_LOCKING */ +} +EXPORT_SYMBOL_GPL(__rcu_read_unlock); + +/* + * Check for a quiescent state from the current CPU. When a task blocks, + * the task is recorded in the rcu_preempt_ctrlblk structure, which is + * checked elsewhere. This is called from the scheduling-clock interrupt. + * + * Caller must disable hard irqs. + */ +static void rcu_preempt_check_callbacks(void) +{ + struct task_struct *t = current; + + if (!rcu_preempt_running_reader() && rcu_preempt_gp_in_progress()) + rcu_preempt_cpu_qs(); + if (&rcu_preempt_ctrlblk.rcb.rcucblist != + rcu_preempt_ctrlblk.rcb.donetail) + raise_softirq(RCU_SOFTIRQ); + if (rcu_preempt_gp_in_progress() && rcu_preempt_running_reader()) + t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; +} + +/* + * TINY_PREEMPT_RCU has an extra callback-list tail pointer to + * update, so this is invoked from __rcu_process_callbacks() to + * handle that case. Of course, it is invoked for all flavors of + * RCU, but RCU callbacks can appear only on one of the lists, and + * neither ->nexttail nor ->donetail can possibly be NULL, so there + * is no need for an explicit check. + */ +static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) +{ + if (rcu_preempt_ctrlblk.nexttail == rcp->donetail) + rcu_preempt_ctrlblk.nexttail = &rcp->rcucblist; +} + +/* + * Process callbacks for preemptible RCU. + */ +static void rcu_preempt_process_callbacks(void) +{ + __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); +} + +/* + * Queue a preemptible -RCU callback for invocation after a grace period. + */ +void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) +{ + unsigned long flags; + + debug_rcu_head_queue(head); + head->func = func; + head->next = NULL; + + local_irq_save(flags); + *rcu_preempt_ctrlblk.nexttail = head; + rcu_preempt_ctrlblk.nexttail = &head->next; + rcu_preempt_start_gp(); /* checks to see if GP needed. */ + local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(call_rcu); + +void rcu_barrier(void) +{ + struct rcu_synchronize rcu; + + init_rcu_head_on_stack(&rcu.head); + init_completion(&rcu.completion); + /* Will wake me after RCU finished. */ + call_rcu(&rcu.head, wakeme_after_rcu); + /* Wait for it. */ + wait_for_completion(&rcu.completion); + destroy_rcu_head_on_stack(&rcu.head); +} +EXPORT_SYMBOL_GPL(rcu_barrier); + +/* + * synchronize_rcu - wait until a grace period has elapsed. + * + * Control will return to the caller some time after a full grace + * period has elapsed, in other words after all currently executing RCU + * read-side critical sections have completed. RCU read-side critical + * sections are delimited by rcu_read_lock() and rcu_read_unlock(), + * and may be nested. + */ +void synchronize_rcu(void) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + if (!rcu_scheduler_active) + return; +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ + + WARN_ON_ONCE(rcu_preempt_running_reader()); + if (!rcu_preempt_blocked_readers_any()) + return; + + /* Once we get past the fastpath checks, same code as rcu_barrier(). */ + rcu_barrier(); +} +EXPORT_SYMBOL_GPL(synchronize_rcu); + +static DECLARE_WAIT_QUEUE_HEAD(sync_rcu_preempt_exp_wq); +static unsigned long sync_rcu_preempt_exp_count; +static DEFINE_MUTEX(sync_rcu_preempt_exp_mutex); + +/* + * Return non-zero if there are any tasks in RCU read-side critical + * sections blocking the current preemptible-RCU expedited grace period. + * If there is no preemptible-RCU expedited grace period currently in + * progress, returns zero unconditionally. + */ +static int rcu_preempted_readers_exp(void) +{ + return rcu_preempt_ctrlblk.exp_tasks != NULL; +} + +/* + * Report the exit from RCU read-side critical section for the last task + * that queued itself during or before the current expedited preemptible-RCU + * grace period. + */ +static void rcu_report_exp_done(void) +{ + wake_up(&sync_rcu_preempt_exp_wq); +} + +/* + * Wait for an rcu-preempt grace period, but expedite it. The basic idea + * is to rely in the fact that there is but one CPU, and that it is + * illegal for a task to invoke synchronize_rcu_expedited() while in a + * preemptible-RCU read-side critical section. Therefore, any such + * critical sections must correspond to blocked tasks, which must therefore + * be on the ->blkd_tasks list. So just record the current head of the + * list in the ->exp_tasks pointer, and wait for all tasks including and + * after the task pointed to by ->exp_tasks to drain. + */ +void synchronize_rcu_expedited(void) +{ + unsigned long flags; + struct rcu_preempt_ctrlblk *rpcp = &rcu_preempt_ctrlblk; + unsigned long snap; + + barrier(); /* ensure prior action seen before grace period. */ + + WARN_ON_ONCE(rcu_preempt_running_reader()); + + /* + * Acquire lock so that there is only one preemptible RCU grace + * period in flight. Of course, if someone does the expedited + * grace period for us while we are acquiring the lock, just leave. + */ + snap = sync_rcu_preempt_exp_count + 1; + mutex_lock(&sync_rcu_preempt_exp_mutex); + if (ULONG_CMP_LT(snap, sync_rcu_preempt_exp_count)) + goto unlock_mb_ret; /* Others did our work for us. */ + + local_irq_save(flags); + + /* + * All RCU readers have to already be on blkd_tasks because + * we cannot legally be executing in an RCU read-side critical + * section. + */ + + /* Snapshot current head of ->blkd_tasks list. */ + rpcp->exp_tasks = rpcp->blkd_tasks.next; + if (rpcp->exp_tasks == &rpcp->blkd_tasks) + rpcp->exp_tasks = NULL; + local_irq_restore(flags); + + /* Wait for tail of ->blkd_tasks list to drain. */ + if (rcu_preempted_readers_exp()) + wait_event(sync_rcu_preempt_exp_wq, + !rcu_preempted_readers_exp()); + + /* Clean up and exit. */ + barrier(); /* ensure expedited GP seen before counter increment. */ + sync_rcu_preempt_exp_count++; +unlock_mb_ret: + mutex_unlock(&sync_rcu_preempt_exp_mutex); + barrier(); /* ensure subsequent action seen after grace period. */ +} +EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); + +/* + * Does preemptible RCU need the CPU to stay out of dynticks mode? + */ +int rcu_preempt_needs_cpu(void) +{ + if (!rcu_preempt_running_reader()) + rcu_preempt_cpu_qs(); + return rcu_preempt_ctrlblk.rcb.rcucblist != NULL; +} + +/* + * Check for a task exiting while in a preemptible -RCU read-side + * critical section, clean up if so. No need to issue warnings, + * as debug_check_no_locks_held() already does this if lockdep + * is enabled. + */ +void exit_rcu(void) +{ + struct task_struct *t = current; + + if (t->rcu_read_lock_nesting == 0) + return; + t->rcu_read_lock_nesting = 1; + rcu_read_unlock(); +} + +#else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ + +/* + * Because preemptible RCU does not exist, it never has any callbacks + * to check. + */ +static void rcu_preempt_check_callbacks(void) +{ +} + +/* + * Because preemptible RCU does not exist, it never has any callbacks + * to remove. + */ +static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp) +{ +} + +/* + * Because preemptible RCU does not exist, it never has any callbacks + * to process. + */ +static void rcu_preempt_process_callbacks(void) +{ +} + +#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ + #ifdef CONFIG_DEBUG_LOCK_ALLOC #include diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 2e2726d..7297102 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c @@ -303,6 +303,10 @@ static void rcu_read_delay(struct rcu_random_state *rrsp) mdelay(longdelay_ms); if (!(rcu_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) udelay(shortdelay_us); +#ifdef CONFIG_PREEMPT + if (!preempt_count() && !(rcu_random(rrsp) % (nrealreaders * 20000))) + preempt_schedule(); /* No QS if preempt_disable() in effect */ +#endif } static void rcu_torture_read_unlock(int idx) __releases(RCU) @@ -536,6 +540,8 @@ static void srcu_read_delay(struct rcu_random_state *rrsp) delay = rcu_random(rrsp) % (nrealreaders * 2 * longdelay * uspertick); if (!delay) schedule_timeout_interruptible(longdelay); + else + rcu_read_delay(rrsp); } static void srcu_torture_read_unlock(int idx) __releases(&srcu_ctl) diff --git a/kernel/rcutree.c b/kernel/rcutree.c index d5bc439..42140a8 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -143,6 +143,11 @@ module_param(blimit, int, 0); module_param(qhimark, int, 0); module_param(qlowmark, int, 0); +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR +int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT; +module_param(rcu_cpu_stall_suppress, int, 0644); +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ + static void force_quiescent_state(struct rcu_state *rsp, int relaxed); static int rcu_pending(int cpu); @@ -450,7 +455,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) #ifdef CONFIG_RCU_CPU_STALL_DETECTOR -int rcu_cpu_stall_panicking __read_mostly; +int rcu_cpu_stall_suppress __read_mostly; static void record_gp_stall_check_time(struct rcu_state *rsp) { @@ -482,8 +487,11 @@ static void print_other_cpu_stall(struct rcu_state *rsp) rcu_print_task_stall(rnp); raw_spin_unlock_irqrestore(&rnp->lock, flags); - /* OK, time to rat on our buddy... */ - + /* + * OK, time to rat on our buddy... + * See Documentation/RCU/stallwarn.txt for info on how to debug + * RCU CPU stall warnings. + */ printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", rsp->name); rcu_for_each_leaf_node(rsp, rnp) { @@ -512,6 +520,11 @@ static void print_cpu_stall(struct rcu_state *rsp) unsigned long flags; struct rcu_node *rnp = rcu_get_root(rsp); + /* + * OK, time to rat on ourselves... + * See Documentation/RCU/stallwarn.txt for info on how to debug + * RCU CPU stall warnings. + */ printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", rsp->name, smp_processor_id(), jiffies - rsp->gp_start); trigger_all_cpu_backtrace(); @@ -530,7 +543,7 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) long delta; struct rcu_node *rnp; - if (rcu_cpu_stall_panicking) + if (rcu_cpu_stall_suppress) return; delta = jiffies - rsp->jiffies_stall; rnp = rdp->mynode; @@ -548,10 +561,26 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) static int rcu_panic(struct notifier_block *this, unsigned long ev, void *ptr) { - rcu_cpu_stall_panicking = 1; + rcu_cpu_stall_suppress = 1; return NOTIFY_DONE; } +/** + * rcu_cpu_stall_reset - prevent further stall warnings in current grace period + * + * Set the stall-warning timeout way off into the future, thus preventing + * any RCU CPU stall-warning messages from appearing in the current set of + * RCU grace periods. + * + * The caller must disable hard irqs. + */ +void rcu_cpu_stall_reset(void) +{ + rcu_sched_state.jiffies_stall = jiffies + ULONG_MAX / 2; + rcu_bh_state.jiffies_stall = jiffies + ULONG_MAX / 2; + rcu_preempt_stall_reset(); +} + static struct notifier_block rcu_panic_block = { .notifier_call = rcu_panic, }; @@ -571,6 +600,10 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) { } +void rcu_cpu_stall_reset(void) +{ +} + static void __init check_cpu_stall_init(void) { } @@ -712,7 +745,7 @@ static void rcu_start_gp(struct rcu_state *rsp, unsigned long flags) __releases(rcu_get_root(rsp)->lock) { - struct rcu_data *rdp = rsp->rda[smp_processor_id()]; + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); struct rcu_node *rnp = rcu_get_root(rsp); if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { @@ -960,7 +993,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp) { int i; - struct rcu_data *rdp = rsp->rda[smp_processor_id()]; + struct rcu_data *rdp = this_cpu_ptr(rsp->rda); if (rdp->nxtlist == NULL) return; /* irqs disabled, so comparison is stable. */ @@ -984,7 +1017,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) struct rcu_data *rdp; raw_spin_lock_irqsave(&rsp->onofflock, flags); - rdp = rsp->rda[smp_processor_id()]; + rdp = this_cpu_ptr(rsp->rda); if (rsp->orphan_cbs_list == NULL) { raw_spin_unlock_irqrestore(&rsp->onofflock, flags); return; @@ -1007,7 +1040,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) unsigned long flags; unsigned long mask; int need_report = 0; - struct rcu_data *rdp = rsp->rda[cpu]; + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp; /* Exclude any attempts to start a new grace period. */ @@ -1226,7 +1259,8 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *)) cpu = rnp->grplo; bit = 1; for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { - if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) + if ((rnp->qsmask & bit) != 0 && + f(per_cpu_ptr(rsp->rda, cpu))) mask |= bit; } if (mask != 0) { @@ -1402,7 +1436,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), * a quiescent state betweentimes. */ local_irq_save(flags); - rdp = rsp->rda[smp_processor_id()]; + rdp = this_cpu_ptr(rsp->rda); rcu_process_gp_end(rsp, rdp); check_for_new_grace_period(rsp, rdp); @@ -1701,7 +1735,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) { unsigned long flags; int i; - struct rcu_data *rdp = rsp->rda[cpu]; + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ @@ -1729,7 +1763,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) { unsigned long flags; unsigned long mask; - struct rcu_data *rdp = rsp->rda[cpu]; + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rcu_get_root(rsp); /* Set up local state, ensuring consistent view of global state. */ @@ -1865,7 +1899,8 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp) /* * Helper function for rcu_init() that initializes one rcu_state structure. */ -static void __init rcu_init_one(struct rcu_state *rsp) +static void __init rcu_init_one(struct rcu_state *rsp, + struct rcu_data __percpu *rda) { static char *buf[] = { "rcu_node_level_0", "rcu_node_level_1", @@ -1918,37 +1953,23 @@ static void __init rcu_init_one(struct rcu_state *rsp) } } + rsp->rda = rda; rnp = rsp->level[NUM_RCU_LVLS - 1]; for_each_possible_cpu(i) { while (i > rnp->grphi) rnp++; - rsp->rda[i]->mynode = rnp; + per_cpu_ptr(rsp->rda, i)->mynode = rnp; rcu_boot_init_percpu_data(i, rsp); } } -/* - * Helper macro for __rcu_init() and __rcu_init_preempt(). To be used - * nowhere else! Assigns leaf node pointers into each CPU's rcu_data - * structure. - */ -#define RCU_INIT_FLAVOR(rsp, rcu_data) \ -do { \ - int i; \ - \ - for_each_possible_cpu(i) { \ - (rsp)->rda[i] = &per_cpu(rcu_data, i); \ - } \ - rcu_init_one(rsp); \ -} while (0) - void __init rcu_init(void) { int cpu; rcu_bootup_announce(); - RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); - RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); + rcu_init_one(&rcu_sched_state, &rcu_sched_data); + rcu_init_one(&rcu_bh_state, &rcu_bh_data); __rcu_init_preempt(); open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 14c040b..7918ba6 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -254,19 +254,23 @@ struct rcu_data { #define RCU_STALL_DELAY_DELTA 0 #endif -#define RCU_SECONDS_TILL_STALL_CHECK (10 * HZ + RCU_STALL_DELAY_DELTA) +#define RCU_SECONDS_TILL_STALL_CHECK (CONFIG_RCU_CPU_STALL_TIMEOUT * HZ + \ + RCU_STALL_DELAY_DELTA) /* for rsp->jiffies_stall */ -#define RCU_SECONDS_TILL_STALL_RECHECK (30 * HZ + RCU_STALL_DELAY_DELTA) +#define RCU_SECONDS_TILL_STALL_RECHECK (3 * RCU_SECONDS_TILL_STALL_CHECK + 30) /* for rsp->jiffies_stall */ #define RCU_STALL_RAT_DELAY 2 /* Allow other CPUs time */ /* to take at least one */ /* scheduling clock irq */ /* before ratting on them. */ -#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ +#ifdef CONFIG_RCU_CPU_STALL_DETECTOR_RUNNABLE +#define RCU_CPU_STALL_SUPPRESS_INIT 0 +#else +#define RCU_CPU_STALL_SUPPRESS_INIT 1 +#endif -#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) -#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) +#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* * RCU global state, including node hierarchy. This hierarchy is @@ -283,7 +287,7 @@ struct rcu_state { struct rcu_node *level[NUM_RCU_LVLS]; /* Hierarchy levels. */ u32 levelcnt[MAX_RCU_LVLS + 1]; /* # nodes in each level. */ u8 levelspread[NUM_RCU_LVLS]; /* kids/node in each level. */ - struct rcu_data *rda[NR_CPUS]; /* array of rdp pointers. */ + struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ /* The following fields are guarded by the root rcu_node's lock. */ @@ -365,6 +369,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, #ifdef CONFIG_RCU_CPU_STALL_DETECTOR static void rcu_print_detail_task_stall(struct rcu_state *rsp); static void rcu_print_task_stall(struct rcu_node *rnp); +static void rcu_preempt_stall_reset(void); #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 0e4f420..e9e0bc7 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -154,7 +154,7 @@ static void rcu_preempt_note_context_switch(int cpu) (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { /* Possibly blocking in an RCU read-side critical section. */ - rdp = rcu_preempt_state.rda[cpu]; + rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; @@ -201,7 +201,7 @@ static void rcu_preempt_note_context_switch(int cpu) */ void __rcu_read_lock(void) { - ACCESS_ONCE(current->rcu_read_lock_nesting)++; + current->rcu_read_lock_nesting++; barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */ } EXPORT_SYMBOL_GPL(__rcu_read_lock); @@ -344,7 +344,9 @@ void __rcu_read_unlock(void) struct task_struct *t = current; barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */ - if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 && + --t->rcu_read_lock_nesting; + barrier(); /* decrement before load of ->rcu_read_unlock_special */ + if (t->rcu_read_lock_nesting == 0 && unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) rcu_read_unlock_special(t); #ifdef CONFIG_PROVE_LOCKING @@ -417,6 +419,16 @@ static void rcu_print_task_stall(struct rcu_node *rnp) } } +/* + * Suppress preemptible RCU's CPU stall warnings by pushing the + * time of the next stall-warning message comfortably far into the + * future. + */ +static void rcu_preempt_stall_reset(void) +{ + rcu_preempt_state.jiffies_stall = jiffies + ULONG_MAX / 2; +} + #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* @@ -546,9 +558,11 @@ EXPORT_SYMBOL_GPL(call_rcu); * * Control will return to the caller some time after a full grace * period has elapsed, in other words after all currently executing RCU - * read-side critical sections have completed. RCU read-side critical - * sections are delimited by rcu_read_lock() and rcu_read_unlock(), - * and may be nested. + * read-side critical sections have completed. Note, however, that + * upon return from synchronize_rcu(), the caller might well be executing + * concurrently with new RCU read-side critical sections that began while + * synchronize_rcu() was waiting. RCU read-side critical sections are + * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. */ void synchronize_rcu(void) { @@ -771,7 +785,7 @@ static void rcu_preempt_send_cbs_to_orphanage(void) */ static void __init __rcu_init_preempt(void) { - RCU_INIT_FLAVOR(&rcu_preempt_state, rcu_preempt_data); + rcu_init_one(&rcu_preempt_state, &rcu_preempt_data); } /* @@ -865,6 +879,14 @@ static void rcu_print_task_stall(struct rcu_node *rnp) { } +/* + * Because preemptible RCU does not exist, there is no need to suppress + * its CPU stall warnings. + */ +static void rcu_preempt_stall_reset(void) +{ +} + #endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ /* @@ -919,15 +941,6 @@ static void rcu_preempt_process_callbacks(void) } /* - * In classic RCU, call_rcu() is just call_rcu_sched(). - */ -void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) -{ - call_rcu_sched(head, func); -} -EXPORT_SYMBOL_GPL(call_rcu); - -/* * Wait for an rcu-preempt grace period, but make it happen quickly. * But because preemptable RCU does not exist, map to rcu-sched. */ diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 36c95b4..458e032 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -262,7 +262,7 @@ static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp) struct rcu_data *rdp; for_each_possible_cpu(cpu) { - rdp = rsp->rda[cpu]; + rdp = per_cpu_ptr(rsp->rda, cpu); if (rdp->beenonline) print_one_rcu_pending(m, rdp); } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3632ce8..19cccc3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -3846,6 +3846,9 @@ int ring_buffer_read_page(struct ring_buffer *buffer, rpos = reader->read; pos += size; + if (rpos >= commit) + break; + event = rb_reader_event(cpu_buffer); size = rb_event_length(event); } while (len > size); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ba14a22..9ec59f5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3463,6 +3463,7 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { char *buf; + size_t written; if (tracing_disabled) return -EINVAL; @@ -3484,11 +3485,15 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, } else buf[cnt] = '\0'; - cnt = mark_printk("%s", buf); + written = mark_printk("%s", buf); kfree(buf); - *fpos += cnt; + *fpos += written; - return cnt; + /* don't tell userspace we wrote more - it might confuse them */ + if (written > cnt) + written = cnt; + + return written; } static int tracing_clock_show(struct seq_file *m, void *v) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 09b4fa6..4c758f1 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -598,88 +598,165 @@ out: return ret; } -static void print_event_fields(struct trace_seq *s, struct list_head *head) +enum { + FORMAT_HEADER = 1, + FORMAT_PRINTFMT = 2, +}; + +static void *f_next(struct seq_file *m, void *v, loff_t *pos) { + struct ftrace_event_call *call = m->private; struct ftrace_event_field *field; + struct list_head *head; - list_for_each_entry_reverse(field, head, link) { - /* - * Smartly shows the array type(except dynamic array). - * Normal: - * field:TYPE VAR - * If TYPE := TYPE[LEN], it is shown: - * field:TYPE VAR[LEN] - */ - const char *array_descriptor = strchr(field->type, '['); + (*pos)++; - if (!strncmp(field->type, "__data_loc", 10)) - array_descriptor = NULL; + switch ((unsigned long)v) { + case FORMAT_HEADER: + head = &ftrace_common_fields; - if (!array_descriptor) { - trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;" - "\tsize:%u;\tsigned:%d;\n", - field->type, field->name, field->offset, - field->size, !!field->is_signed); - } else { - trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;" - "\tsize:%u;\tsigned:%d;\n", - (int)(array_descriptor - field->type), - field->type, field->name, - array_descriptor, field->offset, - field->size, !!field->is_signed); - } + if (unlikely(list_empty(head))) + return NULL; + + field = list_entry(head->prev, struct ftrace_event_field, link); + return field; + + case FORMAT_PRINTFMT: + /* all done */ + return NULL; + } + + head = trace_get_fields(call); + + /* + * To separate common fields from event fields, the + * LSB is set on the first event field. Clear it in case. + */ + v = (void *)((unsigned long)v & ~1L); + + field = v; + /* + * If this is a common field, and at the end of the list, then + * continue with main list. + */ + if (field->link.prev == &ftrace_common_fields) { + if (unlikely(list_empty(head))) + return NULL; + field = list_entry(head->prev, struct ftrace_event_field, link); + /* Set the LSB to notify f_show to print an extra newline */ + field = (struct ftrace_event_field *) + ((unsigned long)field | 1); + return field; } + + /* If we are done tell f_show to print the format */ + if (field->link.prev == head) + return (void *)FORMAT_PRINTFMT; + + field = list_entry(field->link.prev, struct ftrace_event_field, link); + + return field; } -static ssize_t -event_format_read(struct file *filp, char __user *ubuf, size_t cnt, - loff_t *ppos) +static void *f_start(struct seq_file *m, loff_t *pos) { - struct ftrace_event_call *call = filp->private_data; - struct list_head *head; - struct trace_seq *s; - char *buf; - int r; + loff_t l = 0; + void *p; - if (*ppos) + /* Start by showing the header */ + if (!*pos) + return (void *)FORMAT_HEADER; + + p = (void *)FORMAT_HEADER; + do { + p = f_next(m, p, &l); + } while (p && l < *pos); + + return p; +} + +static int f_show(struct seq_file *m, void *v) +{ + struct ftrace_event_call *call = m->private; + struct ftrace_event_field *field; + const char *array_descriptor; + + switch ((unsigned long)v) { + case FORMAT_HEADER: + seq_printf(m, "name: %s\n", call->name); + seq_printf(m, "ID: %d\n", call->event.type); + seq_printf(m, "format:\n"); return 0; - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; + case FORMAT_PRINTFMT: + seq_printf(m, "\nprint fmt: %s\n", + call->print_fmt); + return 0; + } - trace_seq_init(s); + /* + * To separate common fields from event fields, the + * LSB is set on the first event field. Clear it and + * print a newline if it is set. + */ + if ((unsigned long)v & 1) { + seq_putc(m, '\n'); + v = (void *)((unsigned long)v & ~1L); + } - trace_seq_printf(s, "name: %s\n", call->name); - trace_seq_printf(s, "ID: %d\n", call->event.type); - trace_seq_printf(s, "format:\n"); + field = v; - /* print common fields */ - print_event_fields(s, &ftrace_common_fields); + /* + * Smartly shows the array type(except dynamic array). + * Normal: + * field:TYPE VAR + * If TYPE := TYPE[LEN], it is shown: + * field:TYPE VAR[LEN] + */ + array_descriptor = strchr(field->type, '['); - trace_seq_putc(s, '\n'); + if (!strncmp(field->type, "__data_loc", 10)) + array_descriptor = NULL; - /* print event specific fields */ - head = trace_get_fields(call); - print_event_fields(s, head); + if (!array_descriptor) + seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", + field->type, field->name, field->offset, + field->size, !!field->is_signed); + else + seq_printf(m, "\tfield:%.*s %s%s;\toffset:%u;\tsize:%u;\tsigned:%d;\n", + (int)(array_descriptor - field->type), + field->type, field->name, + array_descriptor, field->offset, + field->size, !!field->is_signed); - r = trace_seq_printf(s, "\nprint fmt: %s\n", call->print_fmt); + return 0; +} - if (!r) { - /* - * ug! The format output is bigger than a PAGE!! - */ - buf = "FORMAT TOO BIG\n"; - r = simple_read_from_buffer(ubuf, cnt, ppos, - buf, strlen(buf)); - goto out; - } +static void f_stop(struct seq_file *m, void *p) +{ +} - r = simple_read_from_buffer(ubuf, cnt, ppos, - s->buffer, s->len); - out: - kfree(s); - return r; +static const struct seq_operations trace_format_seq_ops = { + .start = f_start, + .next = f_next, + .stop = f_stop, + .show = f_show, +}; + +static int trace_format_open(struct inode *inode, struct file *file) +{ + struct ftrace_event_call *call = inode->i_private; + struct seq_file *m; + int ret; + + ret = seq_open(file, &trace_format_seq_ops); + if (ret < 0) + return ret; + + m = file->private_data; + m->private = call; + + return 0; } static ssize_t @@ -877,8 +954,10 @@ static const struct file_operations ftrace_enable_fops = { }; static const struct file_operations ftrace_event_format_fops = { - .open = tracing_open_generic, - .read = event_format_read, + .open = trace_format_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, }; static const struct file_operations ftrace_event_id_fops = { diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 6bff236..6f23369 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -507,7 +507,15 @@ get_return_for_leaf(struct trace_iterator *iter, * if the output fails. */ data->ent = *curr; - data->ret = *next; + /* + * If the next event is not a return type, then + * we only care about what type it is. Otherwise we can + * safely copy the entire event. + */ + if (next->ent.type == TRACE_GRAPH_RET) + data->ret = *next; + else + data->ret.ent.type = next->ent.type; } } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 9e06b7f..52c2172 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -539,6 +539,19 @@ config PROVE_RCU_REPEATEDLY disabling, allowing multiple RCU-lockdep warnings to be printed on a single reboot. +config SPARSE_RCU_POINTER + bool "RCU debugging: sparse-based checks for pointer usage" + default n + help + This feature enables the __rcu sparse annotation for + RCU-protected pointers. This annotation will cause sparse + to flag any non-RCU used of annotated pointers. This can be + helpful when debugging RCU usage. Please note that this feature + is not intended to enforce code cleanliness; it is instead merely + a debugging aid. + + Say Y to make sparse flag questionable use of RCU-protected pointers + Say N if you are unsure. config LOCKDEP @@ -832,6 +845,30 @@ config RCU_CPU_STALL_DETECTOR Say Y if you are unsure. +config RCU_CPU_STALL_TIMEOUT + int "RCU CPU stall timeout in seconds" + depends on RCU_CPU_STALL_DETECTOR + range 3 300 + default 60 + help + If a given RCU grace period extends more than the specified + number of seconds, a CPU stall warning is printed. If the + RCU grace period persists, additional CPU stall warnings are + printed at more widely spaced intervals. + +config RCU_CPU_STALL_DETECTOR_RUNNABLE + bool "RCU CPU stall checking starts automatically at boot" + depends on RCU_CPU_STALL_DETECTOR + default y + help + If set, start checking for RCU CPU stalls immediately on + boot. Otherwise, RCU CPU stall checking must be manually + enabled. + + Say Y if you are unsure. + + Say N if you wish to suppress RCU CPU stall checking during boot. + config RCU_CPU_STALL_VERBOSE bool "Print additional per-task information for RCU_CPU_STALL_DETECTOR" depends on RCU_CPU_STALL_DETECTOR && TREE_PREEMPT_RCU @@ -994,13 +1031,16 @@ config FAULT_INJECTION_STACKTRACE_FILTER config LATENCYTOP bool "Latency measuring infrastructure" + depends on HAVE_LATENCYTOP_SUPPORT + depends on DEBUG_KERNEL + depends on STACKTRACE_SUPPORT + depends on PROC_FS select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE select KALLSYMS select KALLSYMS_ALL select STACKTRACE select SCHEDSTATS select SCHED_DEBUG - depends on HAVE_LATENCYTOP_SUPPORT help Enable this option if you want to use the LatencyTOP tool to find out which userspace is blocking on what kernel operations. diff --git a/lib/radix-tree.c b/lib/radix-tree.c index e907858..899fb75 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -49,7 +49,7 @@ struct radix_tree_node { unsigned int height; /* Height from the bottom */ unsigned int count; struct rcu_head rcu_head; - void *slots[RADIX_TREE_MAP_SIZE]; + void __rcu *slots[RADIX_TREE_MAP_SIZE]; unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; }; diff --git a/mm/shmem.c b/mm/shmem.c index dfaa0f4..080b09a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2325,7 +2325,10 @@ static int shmem_show_options(struct seq_file *seq, struct vfsmount *vfs) static void shmem_put_super(struct super_block *sb) { - kfree(sb->s_fs_info); + struct shmem_sb_info *sbinfo = SHMEM_SB(sb); + + percpu_counter_destroy(&sbinfo->used_blocks); + kfree(sbinfo); sb->s_fs_info = NULL; } @@ -2367,7 +2370,8 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) #endif spin_lock_init(&sbinfo->stat_lock); - percpu_counter_init(&sbinfo->used_blocks, 0); + if (percpu_counter_init(&sbinfo->used_blocks, 0)) + goto failed; sbinfo->free_inodes = sbinfo->max_inodes; sb->s_maxbytes = SHMEM_MAX_BYTES; diff --git a/net/core/dev.c b/net/core/dev.c index 1ae6543..3721fbb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3143,7 +3143,7 @@ pull: put_page(skb_shinfo(skb)->frags[0].page); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, - --skb_shinfo(skb)->nr_frags); + --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); } } diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 6bccba3..51d6c31 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -735,6 +735,7 @@ static void get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -742,6 +743,7 @@ static void get_counters(const struct xt_table_info *t, ++i; } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c439721..97b64b2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -909,6 +909,7 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -916,6 +917,7 @@ get_counters(const struct xt_table_info *t, ++i; /* macro does multi eval of i */ } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 8c8632d..957c924 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -38,7 +38,7 @@ static DEFINE_SPINLOCK(nf_nat_lock); static struct nf_conntrack_l3proto *l3proto __read_mostly; #define MAX_IP_NAT_PROTO 256 -static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] +static const struct nf_nat_protocol __rcu *nf_nat_protos[MAX_IP_NAT_PROTO] __read_mostly; static inline const struct nf_nat_protocol * diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 5359ef4..29a7bca 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -922,6 +922,7 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; + local_bh_disable(); xt_info_wrlock(cpu); xt_entry_foreach(iter, t->entries[cpu], t->size) { ADD_COUNTER(counters[i], iter->counters.bcnt, @@ -929,6 +930,7 @@ get_counters(const struct xt_table_info *t, ++i; } xt_info_wrunlock(cpu); + local_bh_enable(); } put_cpu(); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8f2d040..d126365 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2580,7 +2580,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, }, { .procname = "mtu_expires", @@ -2594,7 +2594,7 @@ ctl_table ipv6_route_table_template[] = { .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, }, { .procname = "gc_min_interval_ms", diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 78b505d..fdaec7d 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -27,7 +27,7 @@ static DEFINE_MUTEX(afinfo_mutex); -const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; +const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL(nf_afinfo); int nf_register_afinfo(const struct nf_afinfo *afinfo) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index cdcc764..5702de3 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -26,10 +26,10 @@ static DEFINE_MUTEX(nf_ct_ecache_mutex); -struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; +struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); -struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; +struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly; EXPORT_SYMBOL_GPL(nf_expect_event_cb); /* deliver cached events and clear cache entry - must be called with locally diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 7dcf7a4..1d9bdae 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -16,7 +16,7 @@ #include #include -static struct nf_ct_ext_type *nf_ct_ext_types[NF_CT_EXT_NUM]; +static struct nf_ct_ext_type __rcu *nf_ct_ext_types[NF_CT_EXT_NUM]; static DEFINE_MUTEX(nf_ct_ext_type_mutex); void __nf_ct_ext_destroy(struct nf_conn *ct) diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 5886ba1..ed6d929 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -28,8 +28,8 @@ #include #include -static struct nf_conntrack_l4proto **nf_ct_protos[PF_MAX] __read_mostly; -struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX] __read_mostly; +static struct nf_conntrack_l4proto __rcu **nf_ct_protos[PF_MAX] __read_mostly; +struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[AF_MAX] __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_l3protos); static DEFINE_MUTEX(nf_ct_proto_mutex); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 7df37fd..b07393e 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,7 +16,7 @@ #define NF_LOG_PREFIXLEN 128 #define NFLOGGER_NAME_LEN 64 -static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; +static const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 78b3cf9..74aebed 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -18,7 +18,7 @@ * long term mutex. The handler must provide an an outfn() to accept packets * for queueing and must reinject all packets it receives, no matter what. */ -static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly; +static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(queue_handler_mutex); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2cbf380..8648a99 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1406,7 +1406,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); int noblock = flags&MSG_DONTWAIT; size_t copied; - struct sk_buff *skb; + struct sk_buff *skb, *frag __maybe_unused = NULL; int err; if (flags&MSG_OOB) @@ -1441,21 +1441,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, kfree_skb(skb); skb = compskb; } else { - /* - * Before setting frag_list to NULL, we must get a - * private copy of skb if shared (because of MSG_PEEK) - */ - if (skb_shared(skb)) { - struct sk_buff *nskb; - - nskb = pskb_copy(skb, GFP_KERNEL); - kfree_skb(skb); - skb = nskb; - err = -ENOMEM; - if (!skb) - goto out; - } - kfree_skb(skb_shinfo(skb)->frag_list); + frag = skb_shinfo(skb)->frag_list; skb_shinfo(skb)->frag_list = NULL; } } @@ -1492,6 +1478,10 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, if (flags & MSG_TRUNC) copied = skb->len; +#ifdef CONFIG_COMPAT_NETLINK_MESSAGES + skb_shinfo(skb)->frag_list = frag; +#endif + skb_free_datagram(sk, skb); if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 8406c66..c2ed90a 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -152,21 +152,24 @@ static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); - struct tc_gact opt; struct tcf_gact *gact = a->priv; + struct tc_gact opt = { + .index = gact->tcf_index, + .refcnt = gact->tcf_refcnt - ref, + .bindcnt = gact->tcf_bindcnt - bind, + .action = gact->tcf_action, + }; struct tcf_t t; - opt.index = gact->tcf_index; - opt.refcnt = gact->tcf_refcnt - ref; - opt.bindcnt = gact->tcf_bindcnt - bind; - opt.action = gact->tcf_action; NLA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); #ifdef CONFIG_GACT_PROB if (gact->tcfg_ptype) { - struct tc_gact_p p_opt; - p_opt.paction = gact->tcfg_paction; - p_opt.pval = gact->tcfg_pval; - p_opt.ptype = gact->tcfg_ptype; + struct tc_gact_p p_opt = { + .paction = gact->tcfg_paction, + .pval = gact->tcfg_pval, + .ptype = gact->tcfg_ptype, + }; + NLA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); } #endif diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 11f195a..0c311be 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -219,15 +219,16 @@ static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, i { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = a->priv; - struct tc_mirred opt; + struct tc_mirred opt = { + .index = m->tcf_index, + .action = m->tcf_action, + .refcnt = m->tcf_refcnt - ref, + .bindcnt = m->tcf_bindcnt - bind, + .eaction = m->tcfm_eaction, + .ifindex = m->tcfm_ifindex, + }; struct tcf_t t; - opt.index = m->tcf_index; - opt.action = m->tcf_action; - opt.refcnt = m->tcf_refcnt - ref; - opt.bindcnt = m->tcf_bindcnt - bind; - opt.eaction = m->tcfm_eaction; - opt.ifindex = m->tcfm_ifindex; NLA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 509a2d5..186eb83 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -272,19 +272,19 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_nat *p = a->priv; - struct tc_nat opt; + struct tc_nat opt = { + .old_addr = p->old_addr, + .new_addr = p->new_addr, + .mask = p->mask, + .flags = p->flags, + + .index = p->tcf_index, + .action = p->tcf_action, + .refcnt = p->tcf_refcnt - ref, + .bindcnt = p->tcf_bindcnt - bind, + }; struct tcf_t t; - opt.old_addr = p->old_addr; - opt.new_addr = p->new_addr; - opt.mask = p->mask; - opt.flags = p->flags; - - opt.index = p->tcf_index; - opt.action = p->tcf_action; - opt.refcnt = p->tcf_refcnt - ref; - opt.bindcnt = p->tcf_bindcnt - bind; - NLA_PUT(skb, TCA_NAT_PARMS, sizeof(opt), &opt); t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 4a1d640..97e84f3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -164,13 +164,14 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_defact *d = a->priv; - struct tc_defact opt; + struct tc_defact opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; struct tcf_t t; - opt.index = d->tcf_index; - opt.refcnt = d->tcf_refcnt - ref; - opt.bindcnt = d->tcf_bindcnt - bind; - opt.action = d->tcf_action; NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata); t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index e9607fe..66cbf4e 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -159,13 +159,14 @@ static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, { unsigned char *b = skb_tail_pointer(skb); struct tcf_skbedit *d = a->priv; - struct tc_skbedit opt; + struct tc_skbedit opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; struct tcf_t t; - opt.index = d->tcf_index; - opt.refcnt = d->tcf_refcnt - ref; - opt.bindcnt = d->tcf_bindcnt - bind; - opt.action = d->tcf_action; NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt); if (d->flags & SKBEDIT_F_PRIORITY) NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 443c161..3376d76 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -18,10 +18,11 @@ config SUNRPC_XPRT_RDMA If unsure, say N. config RPCSEC_GSS_KRB5 - tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL + tristate + depends on SUNRPC && CRYPTO + prompt "Secure RPC: Kerberos V mechanism" if !(NFS_V4 || NFSD_V4) + default y select SUNRPC_GSS - select CRYPTO select CRYPTO_MD5 select CRYPTO_DES select CRYPTO_CBC @@ -34,7 +35,7 @@ config RPCSEC_GSS_KRB5 available from http://linux-nfs.org/. In addition, user-space Kerberos support should be installed. - If unsure, say N. + If unsure, say Y. config RPCSEC_GSS_SPKM3 tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e5e28d1..2ac3f6e 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -249,6 +249,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, req->rl_nchunks = nchunks; BUG_ON(nchunks == 0); + BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR) + && (nchunks > 3)); /* * finish off header. If write, marshal discrim and nchunks. diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 27015c6..5f4c7b3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -650,10 +650,22 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_send_wr = cdata->max_requests; switch (ia->ri_memreg_strategy) { case RPCRDMA_FRMR: - /* Add room for frmr register and invalidate WRs */ - ep->rep_attr.cap.max_send_wr *= 3; - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) - return -EINVAL; + /* Add room for frmr register and invalidate WRs. + * 1. FRMR reg WR for head + * 2. FRMR invalidate WR for head + * 3. FRMR reg WR for pagelist + * 4. FRMR invalidate WR for pagelist + * 5. FRMR reg WR for tail + * 6. FRMR invalidate WR for tail + * 7. The RDMA_SEND WR + */ + ep->rep_attr.cap.max_send_wr *= 7; + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) { + cdata->max_requests = devattr.max_qp_wr / 7; + if (!cdata->max_requests) + return -EINVAL; + ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7; + } break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: @@ -1490,7 +1502,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, memset(&frmr_wr, 0, sizeof frmr_wr); frmr_wr.opcode = IB_WR_FAST_REG_MR; frmr_wr.send_flags = 0; /* unsignaled */ - frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; + frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma; frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; frmr_wr.wr.fast_reg.page_list_len = i; frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 49a62f0..b6309db 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1305,10 +1305,11 @@ static void xs_tcp_state_change(struct sock *sk) if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); - dprintk("RPC: state %x conn %d dead %d zapped %d\n", + dprintk("RPC: state %x conn %d dead %d zapped %d sk_shutdown %d\n", sk->sk_state, xprt_connected(xprt), sock_flag(sk, SOCK_DEAD), - sock_flag(sk, SOCK_ZAPPED)); + sock_flag(sk, SOCK_ZAPPED), + sk->sk_shutdown); switch (sk->sk_state) { case TCP_ESTABLISHED: @@ -1779,10 +1780,25 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *tra { unsigned int state = transport->inet->sk_state; - if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) - return; - if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) - return; + if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) { + /* we don't need to abort the connection if the socket + * hasn't undergone a shutdown + */ + if (transport->inet->sk_shutdown == 0) + return; + dprintk("RPC: %s: TCP_CLOSEd and sk_shutdown set to %d\n", + __func__, transport->inet->sk_shutdown); + } + if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) { + /* we don't need to abort the connection if the socket + * hasn't undergone a shutdown + */ + if (transport->inet->sk_shutdown == 0) + return; + dprintk("RPC: %s: ESTABLISHED/SYN_SENT " + "sk_shutdown set to %d\n", + __func__, transport->inet->sk_shutdown); + } xs_abort_connection(xprt, transport); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index ba59983..b14ed4b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2504,7 +2504,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, if (p->dir > XFRM_POLICY_OUT) return NULL; - xp = xfrm_policy_alloc(net, GFP_KERNEL); + xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 0171060..e67f054 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -159,6 +159,7 @@ my $section_regex; # Find the start of a section my $function_regex; # Find the name of a function # (return offset and func name) my $mcount_regex; # Find the call site to mcount (return offset) +my $mcount_adjust; # Address adjustment to mcount offset my $alignment; # The .align value to use for $mcount_section my $section_type; # Section header plus possible alignment command my $can_use_local = 0; # If we can use local function references @@ -213,6 +214,7 @@ $section_regex = "Disassembly of section\\s+(\\S+):"; $function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount\$"; $section_type = '@progbits'; +$mcount_adjust = 0; $type = ".long"; if ($arch eq "x86_64") { @@ -351,6 +353,9 @@ if ($arch eq "x86_64") { } elsif ($arch eq "microblaze") { # Microblaze calls '_mcount' instead of plain 'mcount'. $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$"; +} elsif ($arch eq "blackfin") { + $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s__mcount\$"; + $mcount_adjust = -4; } else { die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD"; } @@ -511,7 +516,7 @@ while () { } # is this a call site to mcount? If so, record it to print later if ($text_found && /$mcount_regex/) { - push(@offsets, hex $1); + push(@offsets, (hex $1) + $mcount_adjust); } } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index d5666d3..f73e2c2 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -607,8 +607,8 @@ static int apparmor_setprocattr(struct task_struct *task, char *name, return error; } -static int apparmor_task_setrlimit(unsigned int resource, - struct rlimit *new_rlim) +static int apparmor_task_setrlimit(struct task_struct *task, + unsigned int resource, struct rlimit *new_rlim) { struct aa_profile *profile = aa_current_profile(); int error = 0; diff --git a/security/apparmor/path.c b/security/apparmor/path.c index 96bab94..19358dc 100644 --- a/security/apparmor/path.c +++ b/security/apparmor/path.c @@ -62,19 +62,14 @@ static int d_namespace_path(struct path *path, char *buf, int buflen, int deleted, connected; int error = 0; - /* Get the root we want to resolve too */ + /* Get the root we want to resolve too, released below */ if (flags & PATH_CHROOT_REL) { /* resolve paths relative to chroot */ - read_lock(¤t->fs->lock); - root = current->fs->root; - /* released below */ - path_get(&root); - read_unlock(¤t->fs->lock); + get_fs_root(current->fs, &root); } else { /* resolve paths relative to namespace */ root.mnt = current->nsproxy->mnt_ns->root; root.dentry = root.mnt->mnt_root; - /* released below */ path_get(&root); } diff --git a/security/commoncap.c b/security/commoncap.c index 4e01599..9d172e6 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -40,7 +40,7 @@ * * Warn if that happens, once per boot. */ -static void warn_setuid_and_fcaps_mixed(char *fname) +static void warn_setuid_and_fcaps_mixed(const char *fname) { static int warned; if (!warned) { diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 42043f9..4796ddd 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2170,8 +2170,9 @@ static inline void flush_unauthorized_files(const struct cred *cred, tty = get_current_tty(); if (tty) { - file_list_lock(); + spin_lock(&tty_files_lock); if (!list_empty(&tty->tty_files)) { + struct tty_file_private *file_priv; struct inode *inode; /* Revalidate access to controlling tty. @@ -2179,14 +2180,16 @@ static inline void flush_unauthorized_files(const struct cred *cred, than using file_has_perm, as this particular open file may belong to another process and we are only interested in the inode-based check here. */ - file = list_first_entry(&tty->tty_files, struct file, f_u.fu_list); + file_priv = list_first_entry(&tty->tty_files, + struct tty_file_private, list); + file = file_priv->file; inode = file->f_path.dentry->d_inode; if (inode_has_perm(cred, inode, FILE__READ | FILE__WRITE, NULL)) { drop_tty = 1; } } - file_list_unlock(); + spin_unlock(&tty_files_lock); tty_kref_put(tty); } /* Reset controlling tty. */ diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index a3b2a64..134fc6c 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -978,6 +978,10 @@ static int snd_pcm_do_pause(struct snd_pcm_substream *substream, int push) { if (substream->runtime->trigger_master != substream) return 0; + /* some drivers might use hw_ptr to recover from the pause - + update the hw_ptr now */ + if (push) + snd_pcm_update_hw_ptr(substream); /* The jiffies check in snd_pcm_update_hw_ptr*() is done by * a delta betwen the current jiffies, this gives a large enough * delta, effectively to skip the check once. diff --git a/sound/pci/emu10k1/emu10k1.c b/sound/pci/emu10k1/emu10k1.c index 4203782..aff8387 100644 --- a/sound/pci/emu10k1/emu10k1.c +++ b/sound/pci/emu10k1/emu10k1.c @@ -52,6 +52,7 @@ static int max_synth_voices[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 64}; static int max_buffer_size[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 128}; static int enable_ir[SNDRV_CARDS]; static uint subsystem[SNDRV_CARDS]; /* Force card subsystem model */ +static uint delay_pcm_irq[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = 2}; module_param_array(index, int, NULL, 0444); MODULE_PARM_DESC(index, "Index value for the EMU10K1 soundcard."); @@ -73,6 +74,8 @@ module_param_array(enable_ir, bool, NULL, 0444); MODULE_PARM_DESC(enable_ir, "Enable IR."); module_param_array(subsystem, uint, NULL, 0444); MODULE_PARM_DESC(subsystem, "Force card subsystem model."); +module_param_array(delay_pcm_irq, uint, NULL, 0444); +MODULE_PARM_DESC(delay_pcm_irq, "Delay PCM interrupt by specified number of samples (default 0)."); /* * Class 0401: 1102:0008 (rev 00) Subsystem: 1102:1001 -> Audigy2 Value Model:SB0400 */ @@ -127,6 +130,7 @@ static int __devinit snd_card_emu10k1_probe(struct pci_dev *pci, &emu)) < 0) goto error; card->private_data = emu; + emu->delay_pcm_irq = delay_pcm_irq[dev] & 0x1f; if ((err = snd_emu10k1_pcm(emu, 0, NULL)) < 0) goto error; if ((err = snd_emu10k1_pcm_mic(emu, 1, NULL)) < 0) diff --git a/sound/pci/emu10k1/emupcm.c b/sound/pci/emu10k1/emupcm.c index 55b83ef..622bace 100644 --- a/sound/pci/emu10k1/emupcm.c +++ b/sound/pci/emu10k1/emupcm.c @@ -332,7 +332,7 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, evoice->epcm->ccca_start_addr = start_addr + ccis; if (extra) { start_addr += ccis; - end_addr += ccis; + end_addr += ccis + emu->delay_pcm_irq; } if (stereo && !extra) { snd_emu10k1_ptr_write(emu, CPF, voice, CPF_STEREO_MASK); @@ -360,7 +360,9 @@ static void snd_emu10k1_pcm_init_voice(struct snd_emu10k1 *emu, /* Assumption that PT is already 0 so no harm overwriting */ snd_emu10k1_ptr_write(emu, PTRX, voice, (send_amount[0] << 8) | send_amount[1]); snd_emu10k1_ptr_write(emu, DSL, voice, end_addr | (send_amount[3] << 24)); - snd_emu10k1_ptr_write(emu, PSST, voice, start_addr | (send_amount[2] << 24)); + snd_emu10k1_ptr_write(emu, PSST, voice, + (start_addr + (extra ? emu->delay_pcm_irq : 0)) | + (send_amount[2] << 24)); if (emu->card_capabilities->emu_model) pitch_target = PITCH_48000; /* Disable interpolators on emu1010 card */ else @@ -732,6 +734,23 @@ static void snd_emu10k1_playback_stop_voice(struct snd_emu10k1 *emu, struct snd_ snd_emu10k1_ptr_write(emu, IP, voice, 0); } +static inline void snd_emu10k1_playback_mangle_extra(struct snd_emu10k1 *emu, + struct snd_emu10k1_pcm *epcm, + struct snd_pcm_substream *substream, + struct snd_pcm_runtime *runtime) +{ + unsigned int ptr, period_pos; + + /* try to sychronize the current position for the interrupt + source voice */ + period_pos = runtime->status->hw_ptr - runtime->hw_ptr_interrupt; + period_pos %= runtime->period_size; + ptr = snd_emu10k1_ptr_read(emu, CCCA, epcm->extra->number); + ptr &= ~0x00ffffff; + ptr |= epcm->ccca_start_addr + period_pos; + snd_emu10k1_ptr_write(emu, CCCA, epcm->extra->number, ptr); +} + static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream, int cmd) { @@ -753,6 +772,8 @@ static int snd_emu10k1_playback_trigger(struct snd_pcm_substream *substream, /* follow thru */ case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: case SNDRV_PCM_TRIGGER_RESUME: + if (cmd == SNDRV_PCM_TRIGGER_PAUSE_RELEASE) + snd_emu10k1_playback_mangle_extra(emu, epcm, substream, runtime); mix = &emu->pcm_mixer[substream->number]; snd_emu10k1_playback_prepare_voice(emu, epcm->voices[0], 1, 0, mix); snd_emu10k1_playback_prepare_voice(emu, epcm->voices[1], 0, 0, mix); @@ -869,8 +890,9 @@ static snd_pcm_uframes_t snd_emu10k1_playback_pointer(struct snd_pcm_substream * #endif /* printk(KERN_DEBUG - "ptr = 0x%x, buffer_size = 0x%x, period_size = 0x%x\n", - ptr, runtime->buffer_size, runtime->period_size); + "ptr = 0x%lx, buffer_size = 0x%lx, period_size = 0x%lx\n", + (long)ptr, (long)runtime->buffer_size, + (long)runtime->period_size); */ return ptr; } diff --git a/sound/pci/emu10k1/memory.c b/sound/pci/emu10k1/memory.c index ffb1ddb..957a311 100644 --- a/sound/pci/emu10k1/memory.c +++ b/sound/pci/emu10k1/memory.c @@ -310,8 +310,10 @@ snd_emu10k1_alloc_pages(struct snd_emu10k1 *emu, struct snd_pcm_substream *subst if (snd_BUG_ON(!hdr)) return NULL; + idx = runtime->period_size >= runtime->buffer_size ? + (emu->delay_pcm_irq * 2) : 0; mutex_lock(&hdr->block_mutex); - blk = search_empty(emu, runtime->dma_bytes); + blk = search_empty(emu, runtime->dma_bytes + idx); if (blk == NULL) { mutex_unlock(&hdr->block_mutex); return NULL; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 31b5d9e..c424952 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3049,6 +3049,7 @@ static struct snd_pci_quirk cxt5066_cfg_tbl[] = { SND_PCI_QUIRK(0x1028, 0x02f5, "Dell", CXT5066_DELL_LAPTOP), SND_PCI_QUIRK(0x152d, 0x0833, "OLPC XO-1.5", CXT5066_OLPC_XO_1_5), + SND_PCI_QUIRK(0x1028, 0x02d8, "Dell Vostro", CXT5066_DELL_VOSTO), SND_PCI_QUIRK(0x1028, 0x0402, "Dell Vostro", CXT5066_DELL_VOSTO), SND_PCI_QUIRK(0x1028, 0x0408, "Dell Inspiron One 19T", CXT5066_IDEAPAD), SND_PCI_QUIRK(0x1179, 0xff50, "Toshiba Satellite P500-PSPGSC-01800T", CXT5066_OLPC_XO_1_5), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2cd1ae8..a4dd045 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -19030,6 +19030,7 @@ static int patch_alc888(struct hda_codec *codec) /* * ALC680 support */ +#define ALC680_DIGIN_NID ALC880_DIGIN_NID #define ALC680_DIGOUT_NID ALC880_DIGOUT_NID #define alc680_modes alc260_modes @@ -19044,23 +19045,93 @@ static hda_nid_t alc680_adc_nids[3] = { 0x07, 0x08, 0x09 }; +/* + * Analog capture ADC cgange + */ +static int alc680_capture_pcm_prepare(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + unsigned int stream_tag, + unsigned int format, + struct snd_pcm_substream *substream) +{ + struct alc_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + unsigned int pre_mic, pre_line; + + pre_mic = snd_hda_jack_detect(codec, cfg->input_pins[AUTO_PIN_MIC]); + pre_line = snd_hda_jack_detect(codec, cfg->input_pins[AUTO_PIN_LINE]); + + spec->cur_adc_stream_tag = stream_tag; + spec->cur_adc_format = format; + + if (pre_mic || pre_line) { + if (pre_mic) + snd_hda_codec_setup_stream(codec, 0x08, stream_tag, 0, + format); + else + snd_hda_codec_setup_stream(codec, 0x09, stream_tag, 0, + format); + } else + snd_hda_codec_setup_stream(codec, 0x07, stream_tag, 0, format); + return 0; +} + +static int alc680_capture_pcm_cleanup(struct hda_pcm_stream *hinfo, + struct hda_codec *codec, + struct snd_pcm_substream *substream) +{ + snd_hda_codec_cleanup_stream(codec, 0x07); + snd_hda_codec_cleanup_stream(codec, 0x08); + snd_hda_codec_cleanup_stream(codec, 0x09); + return 0; +} + +static struct hda_pcm_stream alc680_pcm_analog_auto_capture = { + .substreams = 1, /* can be overridden */ + .channels_min = 2, + .channels_max = 2, + /* NID is set in alc_build_pcms */ + .ops = { + .prepare = alc680_capture_pcm_prepare, + .cleanup = alc680_capture_pcm_cleanup + }, +}; + static struct snd_kcontrol_new alc680_base_mixer[] = { /* output mixer control */ HDA_CODEC_VOLUME("Front Playback Volume", 0x2, 0x0, HDA_OUTPUT), HDA_CODEC_MUTE("Front Playback Switch", 0x14, 0x0, HDA_OUTPUT), HDA_CODEC_VOLUME("Headphone Playback Volume", 0x4, 0x0, HDA_OUTPUT), HDA_CODEC_MUTE("Headphone Playback Switch", 0x16, 0x0, HDA_OUTPUT), + HDA_CODEC_VOLUME("Int Mic Boost", 0x12, 0, HDA_INPUT), HDA_CODEC_VOLUME("Mic Boost", 0x18, 0, HDA_INPUT), + HDA_CODEC_VOLUME("Line In Boost", 0x19, 0, HDA_INPUT), { } }; -static struct snd_kcontrol_new alc680_capture_mixer[] = { - HDA_CODEC_VOLUME("Capture Volume", 0x07, 0x0, HDA_INPUT), - HDA_CODEC_MUTE("Capture Switch", 0x07, 0x0, HDA_INPUT), - HDA_CODEC_VOLUME_IDX("Capture Volume", 1, 0x08, 0x0, HDA_INPUT), - HDA_CODEC_MUTE_IDX("Capture Switch", 1, 0x08, 0x0, HDA_INPUT), - HDA_CODEC_VOLUME_IDX("Capture Volume", 2, 0x09, 0x0, HDA_INPUT), - HDA_CODEC_MUTE_IDX("Capture Switch", 2, 0x09, 0x0, HDA_INPUT), +static struct hda_bind_ctls alc680_bind_cap_vol = { + .ops = &snd_hda_bind_vol, + .values = { + HDA_COMPOSE_AMP_VAL(0x07, 3, 0, HDA_INPUT), + HDA_COMPOSE_AMP_VAL(0x08, 3, 0, HDA_INPUT), + HDA_COMPOSE_AMP_VAL(0x09, 3, 0, HDA_INPUT), + 0 + }, +}; + +static struct hda_bind_ctls alc680_bind_cap_switch = { + .ops = &snd_hda_bind_sw, + .values = { + HDA_COMPOSE_AMP_VAL(0x07, 3, 0, HDA_INPUT), + HDA_COMPOSE_AMP_VAL(0x08, 3, 0, HDA_INPUT), + HDA_COMPOSE_AMP_VAL(0x09, 3, 0, HDA_INPUT), + 0 + }, +}; + +static struct snd_kcontrol_new alc680_master_capture_mixer[] = { + HDA_BIND_VOL("Capture Volume", &alc680_bind_cap_vol), + HDA_BIND_SW("Capture Switch", &alc680_bind_cap_switch), { } /* end */ }; @@ -19068,25 +19139,73 @@ static struct snd_kcontrol_new alc680_capture_mixer[] = { * generic initialization of ADC, input mixers and output mixers */ static struct hda_verb alc680_init_verbs[] = { - /* Unmute DAC0-1 and set vol = 0 */ - {0x02, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO}, - {0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO}, - {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_ZERO}, + {0x02, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x03, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, + {0x04, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE}, - {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40}, - {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x40}, - {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0}, - {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24}, - {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20}, + {0x12, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN}, + {0x14, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT}, + {0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT}, + {0x16, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP}, + {0x18, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_VREF80}, + {0x19, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_IN}, {0x14, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, {0x15, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, {0x16, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, {0x18, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, {0x19, AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE}, + + {0x16, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_HP_EVENT | AC_USRSP_EN}, + {0x18, AC_VERB_SET_UNSOLICITED_ENABLE, ALC880_MIC_EVENT | AC_USRSP_EN}, + { } }; +/* toggle speaker-output according to the hp-jack state */ +static void alc680_base_setup(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + spec->autocfg.hp_pins[0] = 0x16; + spec->autocfg.speaker_pins[0] = 0x14; + spec->autocfg.speaker_pins[1] = 0x15; + spec->autocfg.input_pins[AUTO_PIN_MIC] = 0x18; + spec->autocfg.input_pins[AUTO_PIN_LINE] = 0x19; +} + +static void alc680_rec_autoswitch(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + unsigned int present; + hda_nid_t new_adc; + + present = snd_hda_jack_detect(codec, cfg->input_pins[AUTO_PIN_MIC]); + + new_adc = present ? 0x8 : 0x7; + __snd_hda_codec_cleanup_stream(codec, !present ? 0x8 : 0x7, 1); + snd_hda_codec_setup_stream(codec, new_adc, + spec->cur_adc_stream_tag, 0, + spec->cur_adc_format); + +} + +static void alc680_unsol_event(struct hda_codec *codec, + unsigned int res) +{ + if ((res >> 26) == ALC880_HP_EVENT) + alc_automute_amp(codec); + if ((res >> 26) == ALC880_MIC_EVENT) + alc680_rec_autoswitch(codec); +} + +static void alc680_inithook(struct hda_codec *codec) +{ + alc_automute_amp(codec); + alc680_rec_autoswitch(codec); +} + /* create input playback/capture controls for the given pin */ static int alc680_new_analog_output(struct alc_spec *spec, hda_nid_t nid, const char *ctlname, int idx) @@ -19197,13 +19316,7 @@ static void alc680_auto_init_hp_out(struct hda_codec *codec) #define alc680_pcm_analog_capture alc880_pcm_analog_capture #define alc680_pcm_analog_alt_capture alc880_pcm_analog_alt_capture #define alc680_pcm_digital_playback alc880_pcm_digital_playback - -static struct hda_input_mux alc680_capture_source = { - .num_items = 1, - .items = { - { "Mic", 0x0 }, - }, -}; +#define alc680_pcm_digital_capture alc880_pcm_digital_capture /* * BIOS auto configuration @@ -19218,6 +19331,7 @@ static int alc680_parse_auto_config(struct hda_codec *codec) alc680_ignore); if (err < 0) return err; + if (!spec->autocfg.line_outs) { if (spec->autocfg.dig_outs || spec->autocfg.dig_in_pin) { spec->multiout.max_channels = 2; @@ -19239,8 +19353,6 @@ static int alc680_parse_auto_config(struct hda_codec *codec) add_mixer(spec, spec->kctls.list); add_verb(spec, alc680_init_verbs); - spec->num_mux_defs = 1; - spec->input_mux = &alc680_capture_source; err = alc_auto_add_mic_boost(codec); if (err < 0) @@ -19279,17 +19391,17 @@ static struct snd_pci_quirk alc680_cfg_tbl[] = { static struct alc_config_preset alc680_presets[] = { [ALC680_BASE] = { .mixers = { alc680_base_mixer }, - .cap_mixer = alc680_capture_mixer, + .cap_mixer = alc680_master_capture_mixer, .init_verbs = { alc680_init_verbs }, .num_dacs = ARRAY_SIZE(alc680_dac_nids), .dac_nids = alc680_dac_nids, - .num_adc_nids = ARRAY_SIZE(alc680_adc_nids), - .adc_nids = alc680_adc_nids, - .hp_nid = 0x04, .dig_out_nid = ALC680_DIGOUT_NID, .num_channel_mode = ARRAY_SIZE(alc680_modes), .channel_mode = alc680_modes, - .input_mux = &alc680_capture_source, + .unsol_event = alc680_unsol_event, + .setup = alc680_base_setup, + .init_hook = alc680_inithook, + }, }; @@ -19333,9 +19445,9 @@ static int patch_alc680(struct hda_codec *codec) setup_preset(codec, &alc680_presets[board_config]); spec->stream_analog_playback = &alc680_pcm_analog_playback; - spec->stream_analog_capture = &alc680_pcm_analog_capture; - spec->stream_analog_alt_capture = &alc680_pcm_analog_alt_capture; + spec->stream_analog_capture = &alc680_pcm_analog_auto_capture; spec->stream_digital_playback = &alc680_pcm_digital_playback; + spec->stream_digital_capture = &alc680_pcm_digital_capture; if (!spec->adc_nids) { spec->adc_nids = alc680_adc_nids; diff --git a/sound/pci/riptide/riptide.c b/sound/pci/riptide/riptide.c index f64fb7d..ad5202e 100644 --- a/sound/pci/riptide/riptide.c +++ b/sound/pci/riptide/riptide.c @@ -1224,15 +1224,14 @@ static int try_to_load_firmware(struct cmdif *cif, struct snd_riptide *chip) firmware.firmware.ASIC, firmware.firmware.CODEC, firmware.firmware.AUXDSP, firmware.firmware.PROG); + if (!chip) + return 1; + for (i = 0; i < FIRMWARE_VERSIONS; i++) { if (!memcmp(&firmware_versions[i], &firmware, sizeof(firmware))) - break; - } - if (i >= FIRMWARE_VERSIONS) - return 0; /* no match */ + return 1; /* OK */ - if (!chip) - return 1; /* OK */ + } snd_printdd("Writing Firmware\n"); if (!chip->fw_entry) { diff --git a/sound/soc/codecs/wm8776.c b/sound/soc/codecs/wm8776.c index 4e212ed..f8154e6 100644 --- a/sound/soc/codecs/wm8776.c +++ b/sound/soc/codecs/wm8776.c @@ -178,13 +178,6 @@ static int wm8776_set_fmt(struct snd_soc_dai *dai, unsigned int fmt) case SND_SOC_DAIFMT_LEFT_J: iface |= 0x0001; break; - /* FIXME: CHECK A/B */ - case SND_SOC_DAIFMT_DSP_A: - iface |= 0x0003; - break; - case SND_SOC_DAIFMT_DSP_B: - iface |= 0x0007; - break; default: return -EINVAL; } diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 41abb90..dcb9700 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -157,10 +157,6 @@ all:: # # Define NO_DWARF if you do not want debug-info analysis feature at all. -$(shell sh -c 'mkdir -p $(OUTPUT)scripts/{perl,python}/Perf-Trace-Util/' 2> /dev/null) -$(shell sh -c 'mkdir -p $(OUTPUT)util/{ui/browsers,scripting-engines}/' 2> /dev/null) -$(shell sh -c 'mkdir $(OUTPUT)bench' 2> /dev/null) - $(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) -include $(OUTPUT)PERF-VERSION-FILE @@ -186,8 +182,6 @@ ifeq ($(ARCH),x86_64) ARCH := x86 endif -$(shell sh -c 'mkdir -p $(OUTPUT)arch/$(ARCH)/util/' 2> /dev/null) - # CFLAGS and LDFLAGS are for the users to override from the command line. # @@ -268,6 +262,7 @@ export prefix bindir sharedir sysconfdir CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar RM = rm -f +MKDIR = mkdir TAR = tar FIND = find INSTALL = install @@ -838,6 +833,7 @@ ifndef V QUIET_CC = @echo ' ' CC $@; QUIET_AR = @echo ' ' AR $@; QUIET_LINK = @echo ' ' LINK $@; + QUIET_MKDIR = @echo ' ' MKDIR $@; QUIET_BUILT_IN = @echo ' ' BUILTIN $@; QUIET_GEN = @echo ' ' GEN $@; QUIET_SUBDIR0 = +@subdir= @@ -1012,6 +1008,14 @@ $(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) $(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) builtin-revert.o wt-status.o: wt-status.h +# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So +# we depend the various files onto their directories. +DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h +$(DIRECTORY_DEPS): $(sort $(dir $(DIRECTORY_DEPS))) +# In the second step, we make a rule to actually create these directories +$(sort $(dir $(DIRECTORY_DEPS))): + $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null + $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) diff --git a/tools/perf/util/ui/browsers/annotate.c b/tools/perf/util/ui/browsers/annotate.c index 55ff792..a90273e 100644 --- a/tools/perf/util/ui/browsers/annotate.c +++ b/tools/perf/util/ui/browsers/annotate.c @@ -146,6 +146,7 @@ static int annotate_browser__run(struct annotate_browser *self, return -1; newtFormAddHotKey(self->b.form, NEWT_KEY_LEFT); + newtFormAddHotKey(self->b.form, NEWT_KEY_RIGHT); nd = self->curr_hot; if (nd) { @@ -178,7 +179,7 @@ static int annotate_browser__run(struct annotate_browser *self, } out: ui_browser__hide(&self->b); - return 0; + return es->u.key; } int hist_entry__tui_annotate(struct hist_entry *self)