From 5bd078dda4d4fbdb4bd138a6bd5b6e274c019ed2 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 14 Sep 2011 11:31:36 -0500 Subject: irq: Add declaration of irq_domain_simple_ops to irqdomain.h irq_domain_simple_ops is exported, but is not declared in irqdomain.h, so add it. Signed-off-by: Rob Herring Cc: Grant Likely Cc: marc.zyngier@arm.com Cc: thomas.abraham@linaro.org Cc: jamie@jamieiles.com Cc: b-cousson@ti.com Cc: shawn.guo@linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: devicetree-discuss@lists.ozlabs.org Link: http://lkml.kernel.org/r/1316017900-19918-2-git-send-email-robherring2@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index e807ad687a0..3ad553e8eae 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -80,6 +80,7 @@ extern void irq_domain_del(struct irq_domain *domain); #endif /* CONFIG_IRQ_DOMAIN */ #if defined(CONFIG_IRQ_DOMAIN) && defined(CONFIG_OF_IRQ) +extern struct irq_domain_ops irq_domain_simple_ops; extern void irq_domain_add_simple(struct device_node *controller, int irq_base); extern void irq_domain_generate_simple(const struct of_device_id *match, u64 phys_base, unsigned int irq_start); -- cgit v1.2.3-70-g09d2 From b6cf8788a3382c2000743a0e393bcc8aeb0601cb Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 20 Sep 2011 17:07:29 +0200 Subject: [S390] kvm: extension capability for new address space layout 598841ca9919d008b520114d8a4378c4ce4e40a1 ([S390] use gmap address spaces for kvm guest images) changed kvm on s390 to use a separate address space for kvm guests. We can now put KVM guests anywhere in the user address mode with a size up to 8PB - as long as the memory is 1MB-aligned. This change was done without KVM extension capability bit. The change was added after 3.0, but we still have a chance to add a feature bit before 3.1 (keeping the releases in a sane state). We use number 71 to avoid collisions with other pending kvm patches as requested by Alexander Graf. Signed-off-by: Christian Borntraeger Acked-by: Avi Kivity Cc: Alexander Graf Signed-off-by: Heiko Carstens --- arch/s390/kvm/kvm-s390.c | 1 + include/linux/kvm.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b4eced131e5..dc2b580e27b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -123,6 +123,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { case KVM_CAP_S390_PSW: + case KVM_CAP_S390_GMAP: r = 1; break; default: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 2c366b52f50..aace6b8691a 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -553,6 +553,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_SPAPR_TCE 63 #define KVM_CAP_PPC_SMT 64 #define KVM_CAP_PPC_RMA 65 +#define KVM_CAP_S390_GMAP 71 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3-70-g09d2 From d94c177beeb4469cd4f6e83354ab0223353e98ed Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 26 Sep 2011 17:44:55 -0700 Subject: vfs pathname lookup: Add LOOKUP_AUTOMOUNT flag Since we've now turned around and made LOOKUP_FOLLOW *not* force an automount, we want to add the ability to force an automount event on lookup even if we don't happen to have one of the other flags that force it implicitly (LOOKUP_OPEN, LOOKUP_DIRECTORY, LOOKUP_PARENT..) Most cases will never want to use this, since you'd normally want to delay automounting as long as possible, which usually implies LOOKUP_OPEN (when we open a file or directory, we really cannot avoid the automount any more). But Trond argued sufficiently forcefully that at a minimum bind mounting a file and quotactl will want to force the automount lookup. Some other cases (like nfs_follow_remote_path()) could use it too, although LOOKUP_DIRECTORY would work there as well. This commit just adds the flag and logic, no users yet, though. It also doesn't actually touch the LOOKUP_NO_AUTOMOUNT flag that is related, and was made irrelevant by the same change that made us not follow on LOOKUP_FOLLOW. Cc: Trond Myklebust Cc: Ian Kent Cc: Jeff Layton Cc: Miklos Szeredi Cc: David Howells Cc: Al Viro Cc: Greg KH Signed-off-by: Linus Torvalds --- fs/namei.c | 2 +- include/linux/namei.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index f4788365ea2..09606fd83d5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -739,7 +739,7 @@ static int follow_automount(struct path *path, unsigned flags, * of the daemon to instantiate them before they can be used. */ if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE)) && + LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && path->dentry->d_inode) return -EISDIR; diff --git a/include/linux/namei.h b/include/linux/namei.h index 76fe2c62ae7..e13dac7caab 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -48,6 +48,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; */ #define LOOKUP_FOLLOW 0x0001 #define LOOKUP_DIRECTORY 0x0002 +#define LOOKUP_AUTOMOUNT 0x0004 #define LOOKUP_PARENT 0x0010 #define LOOKUP_REVAL 0x0020 -- cgit v1.2.3-70-g09d2 From b6c8069d3577481390b3f24a8434ad72a3235594 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 27 Sep 2011 08:12:33 -0700 Subject: vfs: remove LOOKUP_NO_AUTOMOUNT flag That flag no longer makes sense, since we don't look up automount points as eagerly any more. Additionally, it turns out that the NO_AUTOMOUNT handling was buggy to begin with: it would avoid automounting even for cases where we really *needed* to do the automount handling, and could return ENOENT for autofs entries that hadn't been instantiated yet. With our new non-eager automount semantics, one discussion has been about adding a AT_AUTOMOUNT flag to vfs_fstatat (and thus the newfstatat() and fstatat64() system calls), but it's probably not worth it: you can always force at least directory automounting by simply adding the final '/' to the filename, which works for *all* of the stat family system calls, old and new. So AT_NO_AUTOMOUNT (and thus LOOKUP_NO_AUTOMOUNT) really were just a result of our bad default behavior. Acked-by: Ian Kent Acked-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/namei.c | 6 ------ fs/stat.c | 2 -- include/linux/namei.h | 2 +- 3 files changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 09606fd83d5..0b3138de2a3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -721,12 +721,6 @@ static int follow_automount(struct path *path, unsigned flags, if (!path->dentry->d_op || !path->dentry->d_op->d_automount) return -EREMOTE; - /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT - * and this is the terminal part of the path. - */ - if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT)) - return -EISDIR; /* we actually want to stop here */ - /* We don't want to mount if someone's just doing a stat - * unless they're stat'ing a directory and appended a '/' to * the name. diff --git a/fs/stat.c b/fs/stat.c index ba5316ffac6..78a3aa83c7e 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -81,8 +81,6 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, if (!(flag & AT_SYMLINK_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; - if (flag & AT_NO_AUTOMOUNT) - lookup_flags |= LOOKUP_NO_AUTOMOUNT; if (flag & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; diff --git a/include/linux/namei.h b/include/linux/namei.h index e13dac7caab..409328d1cbb 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -53,7 +53,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_PARENT 0x0010 #define LOOKUP_REVAL 0x0020 #define LOOKUP_RCU 0x0040 -#define LOOKUP_NO_AUTOMOUNT 0x0080 + /* * Intent data */ -- cgit v1.2.3-70-g09d2 From f75159e9936143177b442afc78150b7a7ad8aa07 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Tue, 20 Sep 2011 01:25:41 +0000 Subject: ptp: fix L2 event message recognition The IEEE 1588 standard defines two kinds of messages, event and general messages. Event messages require time stamping, and general do not. When using UDP transport, two separate ports are used for the two message types. The BPF designed to recognize event messages incorrectly classifies L2 general messages as event messages. This commit fixes the issue by extending the filter to check the message type field for L2 PTP packets. Event messages are be distinguished from general messages by testing the "general" bit. Signed-off-by: Richard Cochran Cc: Signed-off-by: David S. Miller --- include/linux/ptp_classify.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index e07e2742a86..1dc420ba213 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -51,6 +51,7 @@ #define PTP_CLASS_V2_VLAN (PTP_CLASS_V2 | PTP_CLASS_VLAN) #define PTP_EV_PORT 319 +#define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */ #define OFF_ETYPE 12 #define OFF_IHL 14 @@ -116,14 +117,20 @@ static inline int ptp_filter_init(struct sock_filter *f, int len) {OP_OR, 0, 0, PTP_CLASS_IPV6 }, /* */ \ {OP_RETA, 0, 0, 0 }, /* */ \ /*L3x*/ {OP_RETK, 0, 0, PTP_CLASS_NONE }, /* */ \ -/*L40*/ {OP_JEQ, 0, 6, ETH_P_8021Q }, /* f goto L50 */ \ +/*L40*/ {OP_JEQ, 0, 9, ETH_P_8021Q }, /* f goto L50 */ \ {OP_LDH, 0, 0, OFF_ETYPE + 4 }, /* */ \ - {OP_JEQ, 0, 9, ETH_P_1588 }, /* f goto L60 */ \ + {OP_JEQ, 0, 15, ETH_P_1588 }, /* f goto L60 */ \ + {OP_LDB, 0, 0, ETH_HLEN + VLAN_HLEN }, /* */ \ + {OP_AND, 0, 0, PTP_GEN_BIT }, /* */ \ + {OP_JEQ, 0, 12, 0 }, /* f goto L6x */ \ {OP_LDH, 0, 0, ETH_HLEN + VLAN_HLEN }, /* */ \ {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ {OP_OR, 0, 0, PTP_CLASS_VLAN }, /* */ \ {OP_RETA, 0, 0, 0 }, /* */ \ -/*L50*/ {OP_JEQ, 0, 4, ETH_P_1588 }, /* f goto L61 */ \ +/*L50*/ {OP_JEQ, 0, 7, ETH_P_1588 }, /* f goto L61 */ \ + {OP_LDB, 0, 0, ETH_HLEN }, /* */ \ + {OP_AND, 0, 0, PTP_GEN_BIT }, /* */ \ + {OP_JEQ, 0, 4, 0 }, /* f goto L6x */ \ {OP_LDH, 0, 0, ETH_HLEN }, /* */ \ {OP_AND, 0, 0, PTP_CLASS_VMASK }, /* */ \ {OP_OR, 0, 0, PTP_CLASS_L2 }, /* */ \ -- cgit v1.2.3-70-g09d2 From d670ec13178d0fd8680e6742a2bc6e04f28f87d8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 1 Sep 2011 12:42:04 +0200 Subject: posix-cpu-timers: Cure SMP wobbles David reported: Attached below is a watered-down version of rt/tst-cpuclock2.c from GLIBC. Just build it with "gcc -o test test.c -lpthread -lrt" or similar. Run it several times, and you will see cases where the main thread will measure a process clock difference before and after the nanosleep which is smaller than the cpu-burner thread's individual thread clock difference. This doesn't make any sense since the cpu-burner thread is part of the top-level process's thread group. I've reproduced this on both x86-64 and sparc64 (using both 32-bit and 64-bit binaries). For example: [davem@boricha build-x86_64-linux]$ ./test process: before(0.001221967) after(0.498624371) diff(497402404) thread: before(0.000081692) after(0.498316431) diff(498234739) self: before(0.001223521) after(0.001240219) diff(16698) [davem@boricha build-x86_64-linux]$ The diff of 'process' should always be >= the diff of 'thread'. I make sure to wrap the 'thread' clock measurements the most tightly around the nanosleep() call, and that the 'process' clock measurements are the outer-most ones. --- #include #include #include #include #include #include #include #include static pthread_barrier_t barrier; static void *chew_cpu(void *arg) { pthread_barrier_wait(&barrier); while (1) __asm__ __volatile__("" : : : "memory"); return NULL; } int main(void) { clockid_t process_clock, my_thread_clock, th_clock; struct timespec process_before, process_after; struct timespec me_before, me_after; struct timespec th_before, th_after; struct timespec sleeptime; unsigned long diff; pthread_t th; int err; err = clock_getcpuclockid(0, &process_clock); if (err) return 1; err = pthread_getcpuclockid(pthread_self(), &my_thread_clock); if (err) return 1; pthread_barrier_init(&barrier, NULL, 2); err = pthread_create(&th, NULL, chew_cpu, NULL); if (err) return 1; err = pthread_getcpuclockid(th, &th_clock); if (err) return 1; pthread_barrier_wait(&barrier); err = clock_gettime(process_clock, &process_before); if (err) return 1; err = clock_gettime(my_thread_clock, &me_before); if (err) return 1; err = clock_gettime(th_clock, &th_before); if (err) return 1; sleeptime.tv_sec = 0; sleeptime.tv_nsec = 500000000; nanosleep(&sleeptime, NULL); err = clock_gettime(th_clock, &th_after); if (err) return 1; err = clock_gettime(my_thread_clock, &me_after); if (err) return 1; err = clock_gettime(process_clock, &process_after); if (err) return 1; diff = process_after.tv_nsec - process_before.tv_nsec; printf("process: before(%lu.%.9lu) after(%lu.%.9lu) diff(%lu)\n", process_before.tv_sec, process_before.tv_nsec, process_after.tv_sec, process_after.tv_nsec, diff); diff = th_after.tv_nsec - th_before.tv_nsec; printf("thread: before(%lu.%.9lu) after(%lu.%.9lu) diff(%lu)\n", th_before.tv_sec, th_before.tv_nsec, th_after.tv_sec, th_after.tv_nsec, diff); diff = me_after.tv_nsec - me_before.tv_nsec; printf("self: before(%lu.%.9lu) after(%lu.%.9lu) diff(%lu)\n", me_before.tv_sec, me_before.tv_nsec, me_after.tv_sec, me_after.tv_nsec, diff); return 0; } This is due to us using p->se.sum_exec_runtime in thread_group_cputime() where we iterate the thread group and sum all data. This does not take time since the last schedule operation (tick or otherwise) into account. We can cure this by using task_sched_runtime() at the cost of having to take locks. This also means we can (and must) do away with thread_group_sched_runtime() since the modified thread_group_cputime() is now more accurate and would deadlock when called from thread_group_sched_runtime(). Aside of that it makes the function safe on 32 bit systems. The old code added t->se.sum_exec_runtime unprotected. sum_exec_runtime is a 64bit value and could be changed on another cpu at the same time. Reported-by: David Miller Signed-off-by: Peter Zijlstra Cc: stable@kernel.org Link: http://lkml.kernel.org/r/1314874459.7945.22.camel@twins Tested-by: David Miller Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 1 - kernel/posix-cpu-timers.c | 5 +++-- kernel/sched.c | 24 ------------------------ 3 files changed, 3 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4ac2c0578e0..41d0237fd44 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1956,7 +1956,6 @@ static inline void disable_sched_clock_irqtime(void) {} extern unsigned long long task_sched_runtime(struct task_struct *task); -extern unsigned long long thread_group_sched_runtime(struct task_struct *task); /* sched_exec is called by processes performing an exec */ #ifdef CONFIG_SMP diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index 58f405b581e..c8008dd58ef 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -250,7 +250,7 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) do { times->utime = cputime_add(times->utime, t->utime); times->stime = cputime_add(times->stime, t->stime); - times->sum_exec_runtime += t->se.sum_exec_runtime; + times->sum_exec_runtime += task_sched_runtime(t); } while_each_thread(tsk, t); out: rcu_read_unlock(); @@ -312,7 +312,8 @@ static int cpu_clock_sample_group(const clockid_t which_clock, cpu->cpu = cputime.utime; break; case CPUCLOCK_SCHED: - cpu->sched = thread_group_sched_runtime(p); + thread_group_cputime(p, &cputime); + cpu->sched = cputime.sum_exec_runtime; break; } return 0; diff --git a/kernel/sched.c b/kernel/sched.c index d249ea88428..b50b0f0c9aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -3724,30 +3724,6 @@ unsigned long long task_sched_runtime(struct task_struct *p) return ns; } -/* - * Return sum_exec_runtime for the thread group. - * In case the task is currently running, return the sum plus current's - * pending runtime that have not been accounted yet. - * - * Note that the thread group might have other running tasks as well, - * so the return value not includes other pending runtime that other - * running tasks might have. - */ -unsigned long long thread_group_sched_runtime(struct task_struct *p) -{ - struct task_cputime totals; - unsigned long flags; - struct rq *rq; - u64 ns; - - rq = task_rq_lock(p, &flags); - thread_group_cputime(p, &totals); - ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); - task_rq_unlock(rq, p, &flags); - - return ns; -} - /* * Account user cpu time to a process. * @p: the process that the cpu time gets accounted to -- cgit v1.2.3-70-g09d2 From 5f39e6705faade2e89d119958a8c51b9b6e2c53c Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 3 Oct 2011 09:50:20 -0500 Subject: PCI: Disable MPS configuration by default Add the ability to disable PCI-E MPS turning and using the BIOS configured MPS defaults. Due to the number of issues recently discovered on some x86 chipsets, make this the default behavior. Also, add the option for peer to peer DMA MPS configuration. Peer to peer DMA is outside the scope of this patch, but MPS configuration could prevent it from working by having the MPS on one root port different than the MPS on another. To work around this, simply make the system wide MPS the smallest possible value (128B). Signed-off-by: Jon Mason Acked-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- drivers/pci/pci.c | 6 +++++- drivers/pci/probe.c | 14 +++++++++++++- include/linux/pci.h | 3 ++- 3 files changed, 20 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 4e84fd4a431..e9651f0a881 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -77,7 +77,7 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE; unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; -enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_SAFE; +enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_TUNE_OFF; /* * The default CLS is used if arch didn't set CLS explicitly and not @@ -3568,10 +3568,14 @@ static int __init pci_setup(char *str) pci_hotplug_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "hpmemsize=", 10)) { pci_hotplug_mem_size = memparse(str + 10, &str); + } else if (!strncmp(str, "pcie_bus_tune_off", 17)) { + pcie_bus_config = PCIE_BUS_TUNE_OFF; } else if (!strncmp(str, "pcie_bus_safe", 13)) { pcie_bus_config = PCIE_BUS_SAFE; } else if (!strncmp(str, "pcie_bus_perf", 13)) { pcie_bus_config = PCIE_BUS_PERFORMANCE; + } else if (!strncmp(str, "pcie_bus_peer2peer", 18)) { + pcie_bus_config = PCIE_BUS_PEER2PEER; } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f3f94a5c068..6ab6bd3df4b 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1458,12 +1458,24 @@ static int pcie_bus_configure_set(struct pci_dev *dev, void *data) */ void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) { - u8 smpss = mpss; + u8 smpss; if (!pci_is_pcie(bus->self)) return; + if (pcie_bus_config == PCIE_BUS_TUNE_OFF) + return; + + /* FIXME - Peer to peer DMA is possible, though the endpoint would need + * to be aware to the MPS of the destination. To work around this, + * simply force the MPS of the entire system to the smallest possible. + */ + if (pcie_bus_config == PCIE_BUS_PEER2PEER) + smpss = 0; + if (pcie_bus_config == PCIE_BUS_SAFE) { + smpss = mpss; + pcie_find_smpss(bus->self, &smpss); pci_walk_bus(bus, pcie_find_smpss, &smpss); } diff --git a/include/linux/pci.h b/include/linux/pci.h index 8c230cbcbb4..9fc01226055 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -621,8 +621,9 @@ struct pci_driver { extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss); enum pcie_bus_config_types { - PCIE_BUS_PERFORMANCE, + PCIE_BUS_TUNE_OFF, PCIE_BUS_SAFE, + PCIE_BUS_PERFORMANCE, PCIE_BUS_PEER2PEER, }; -- cgit v1.2.3-70-g09d2