diff options
497 files changed, 14763 insertions, 6035 deletions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index c078ad48f7a..8173cec473a 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt @@ -99,18 +99,11 @@ o "qp" indicates that RCU still expects a quiescent state from o "dt" is the current value of the dyntick counter that is incremented when entering or leaving dynticks idle state, either by the - scheduler or by irq. The number after the "/" is the interrupt - nesting depth when in dyntick-idle state, or one greater than - the interrupt-nesting depth otherwise. - - This field is displayed only for CONFIG_NO_HZ kernels. - -o "dn" is the current value of the dyntick counter that is incremented - when entering or leaving dynticks idle state via NMI. If both - the "dt" and "dn" values are even, then this CPU is in dynticks - idle mode and may be ignored by RCU. If either of these two - counters is odd, then RCU must be alert to the possibility of - an RCU read-side critical section running on this CPU. + scheduler or by irq. This number is even if the CPU is in + dyntick idle mode and odd otherwise. The number after the first + "/" is the interrupt nesting depth when in dyntick-idle state, + or one greater than the interrupt-nesting depth otherwise. + The number after the second "/" is the NMI nesting depth. This field is displayed only for CONFIG_NO_HZ kernels. diff --git a/Documentation/acpi/method-customizing.txt b/Documentation/acpi/method-customizing.txt index 3e1d25aee3f..5f55373dd53 100644 --- a/Documentation/acpi/method-customizing.txt +++ b/Documentation/acpi/method-customizing.txt @@ -66,3 +66,8 @@ Note: We can use a kernel with multiple custom ACPI method running, But each individual write to debugfs can implement a SINGLE method override. i.e. if we want to insert/override multiple ACPI methods, we need to redo step c) ~ g) for multiple times. + +Note: Be aware that root can mis-use this driver to modify arbitrary + memory and gain additional rights, if root's privileges got + restricted (for example if root is not allowed to load additional + modules after boot). diff --git a/Documentation/dmaengine.txt b/Documentation/dmaengine.txt index 0c1c2f63c0a..5a0cb1ef616 100644 --- a/Documentation/dmaengine.txt +++ b/Documentation/dmaengine.txt @@ -1 +1,96 @@ -See Documentation/crypto/async-tx-api.txt + DMA Engine API Guide + ==================== + + Vinod Koul <vinod dot koul at intel.com> + +NOTE: For DMA Engine usage in async_tx please see: + Documentation/crypto/async-tx-api.txt + + +Below is a guide to device driver writers on how to use the Slave-DMA API of the +DMA Engine. This is applicable only for slave DMA usage only. + +The slave DMA usage consists of following steps +1. Allocate a DMA slave channel +2. Set slave and controller specific parameters +3. Get a descriptor for transaction +4. Submit the transaction and wait for callback notification + +1. Allocate a DMA slave channel +Channel allocation is slightly different in the slave DMA context, client +drivers typically need a channel from a particular DMA controller only and even +in some cases a specific channel is desired. To request a channel +dma_request_channel() API is used. + +Interface: +struct dma_chan *dma_request_channel(dma_cap_mask_t mask, + dma_filter_fn filter_fn, + void *filter_param); +where dma_filter_fn is defined as: +typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); + +When the optional 'filter_fn' parameter is set to NULL dma_request_channel +simply returns the first channel that satisfies the capability mask. Otherwise, +when the mask parameter is insufficient for specifying the necessary channel, +the filter_fn routine can be used to disposition the available channels in the +system. The filter_fn routine is called once for each free channel in the +system. Upon seeing a suitable channel filter_fn returns DMA_ACK which flags +that channel to be the return value from dma_request_channel. A channel +allocated via this interface is exclusive to the caller, until +dma_release_channel() is called. + +2. Set slave and controller specific parameters +Next step is always to pass some specific information to the DMA driver. Most of +the generic information which a slave DMA can use is in struct dma_slave_config. +It allows the clients to specify DMA direction, DMA addresses, bus widths, DMA +burst lengths etc. If some DMA controllers have more parameters to be sent then +they should try to embed struct dma_slave_config in their controller specific +structure. That gives flexibility to client to pass more parameters, if +required. + +Interface: +int dmaengine_slave_config(struct dma_chan *chan, + struct dma_slave_config *config) + +3. Get a descriptor for transaction +For slave usage the various modes of slave transfers supported by the +DMA-engine are: +slave_sg - DMA a list of scatter gather buffers from/to a peripheral +dma_cyclic - Perform a cyclic DMA operation from/to a peripheral till the + operation is explicitly stopped. +The non NULL return of this transfer API represents a "descriptor" for the given +transaction. + +Interface: +struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_sg)( + struct dma_chan *chan, + struct scatterlist *dst_sg, unsigned int dst_nents, + struct scatterlist *src_sg, unsigned int src_nents, + unsigned long flags); +struct dma_async_tx_descriptor *(*chan->device->device_prep_dma_cyclic)( + struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_data_direction direction); + +4. Submit the transaction and wait for callback notification +To schedule the transaction to be scheduled by dma device, the "descriptor" +returned in above (3) needs to be submitted. +To tell the dma driver that a transaction is ready to be serviced, the +descriptor->submit() callback needs to be invoked. This chains the descriptor to +the pending queue. +The transactions in the pending queue can be activated by calling the +issue_pending API. If channel is idle then the first transaction in queue is +started and subsequent ones queued up. +On completion of the DMA operation the next in queue is submitted and a tasklet +triggered. The tasklet would then call the client driver completion callback +routine for notification, if set. +Interface: +void dma_async_issue_pending(struct dma_chan *chan); + +============================================================================== + +Additional usage notes for dma driver writers +1/ Although DMA engine specifies that completion callback routines cannot submit +any new operations, but typically for slave DMA subsequent transaction may not +be available for submit prior to callback routine being called. This requirement +is not a requirement for DMA-slave devices. But they should take care to drop +the spin-lock they might be holding before calling the callback routine diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index ff31b1cc50a..1a9446b5915 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -6,6 +6,42 @@ be removed from this file. --------------------------- +What: x86 floppy disable_hlt +When: 2012 +Why: ancient workaround of dubious utility clutters the + code used by everybody else. +Who: Len Brown <len.brown@intel.com> + +--------------------------- + +What: CONFIG_APM_CPU_IDLE, and its ability to call APM BIOS in idle +When: 2012 +Why: This optional sub-feature of APM is of dubious reliability, + and ancient APM laptops are likely better served by calling HLT. + Deleting CONFIG_APM_CPU_IDLE allows x86 to stop exporting + the pm_idle function pointer to modules. +Who: Len Brown <len.brown@intel.com> + +---------------------------- + +What: x86_32 "no-hlt" cmdline param +When: 2012 +Why: remove a branch from idle path, simplify code used by everybody. + This option disabled the use of HLT in idle and machine_halt() + for hardware that was flakey 15-years ago. Today we have + "idle=poll" that removed HLT from idle, and so if such a machine + is still running the upstream kernel, "idle=poll" is likely sufficient. +Who: Len Brown <len.brown@intel.com> + +---------------------------- + +What: x86 "idle=mwait" cmdline param +When: 2012 +Why: simplify x86 idle code +Who: Len Brown <len.brown@intel.com> + +---------------------------- + What: PRISM54 When: 2.6.34 diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 61b31acb917..57d827d6071 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -104,7 +104,7 @@ of the locking scheme for directory operations. prototypes: struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); @@ -126,7 +126,7 @@ locking rules: s_umount alloc_inode: destroy_inode: -dirty_inode: (must not sleep) +dirty_inode: write_inode: drop_inode: !!!inode->i_lock!!! evict_inode: diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 21a7dc467bb..88b9f5519af 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -211,7 +211,7 @@ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, int); void (*drop_inode) (struct inode *); void (*delete_inode) (struct inode *); diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5438a2d7907..d9a203b058f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. With this option on every unmap_single operation will result in a hardware IOTLB flush operation as opposed to batching them for performance. - + sp_off [Default Off] + By default, super page will be supported if Intel IOMMU + has the capability. With this option, super page will + not be supported. intremap= [X86-64, Intel-IOMMU] Format: { on (default) | off | nosid } on enable Interrupt Remapping (default) diff --git a/Documentation/laptops/acer-wmi.txt b/Documentation/laptops/acer-wmi.txt deleted file mode 100644 index 4beafa663dd..00000000000 --- a/Documentation/laptops/acer-wmi.txt +++ /dev/null @@ -1,184 +0,0 @@ -Acer Laptop WMI Extras Driver -http://code.google.com/p/aceracpi -Version 0.3 -4th April 2009 - -Copyright 2007-2009 Carlos Corbacho <carlos@strangeworlds.co.uk> - -acer-wmi is a driver to allow you to control various parts of your Acer laptop -hardware under Linux which are exposed via ACPI-WMI. - -This driver completely replaces the old out-of-tree acer_acpi, which I am -currently maintaining for bug fixes only on pre-2.6.25 kernels. All development -work is now focused solely on acer-wmi. - -Disclaimer -********** - -Acer and Wistron have provided nothing towards the development acer_acpi or -acer-wmi. All information we have has been through the efforts of the developers -and the users to discover as much as possible about the hardware. - -As such, I do warn that this could break your hardware - this is extremely -unlikely of course, but please bear this in mind. - -Background -********** - -acer-wmi is derived from acer_acpi, originally developed by Mark -Smith in 2005, then taken over by Carlos Corbacho in 2007, in order to activate -the wireless LAN card under a 64-bit version of Linux, as acerhk[1] (the -previous solution to the problem) relied on making 32 bit BIOS calls which are -not possible in kernel space from a 64 bit OS. - -[1] acerhk: http://www.cakey.de/acerhk/ - -Supported Hardware -****************** - -NOTE: The Acer Aspire One is not supported hardware. It cannot work with -acer-wmi until Acer fix their ACPI-WMI implementation on them, so has been -blacklisted until that happens. - -Please see the website for the current list of known working hardware: - -http://code.google.com/p/aceracpi/wiki/SupportedHardware - -If your laptop is not listed, or listed as unknown, and works with acer-wmi, -please contact me with a copy of the DSDT. - -If your Acer laptop doesn't work with acer-wmi, I would also like to see the -DSDT. - -To send me the DSDT, as root/sudo: - -cat /sys/firmware/acpi/tables/DSDT > dsdt - -And send me the resulting 'dsdt' file. - -Usage -***** - -On Acer laptops, acer-wmi should already be autoloaded based on DMI matching. -For non-Acer laptops, until WMI based autoloading support is added, you will -need to manually load acer-wmi. - -acer-wmi creates /sys/devices/platform/acer-wmi, and fills it with various -files whose usage is detailed below, which enables you to control some of the -following (varies between models): - -* the wireless LAN card radio -* inbuilt Bluetooth adapter -* inbuilt 3G card -* mail LED of your laptop -* brightness of the LCD panel - -Wireless -******** - -With regards to wireless, all acer-wmi does is enable the radio on the card. It -is not responsible for the wireless LED - once the radio is enabled, this is -down to the wireless driver for your card. So the behaviour of the wireless LED, -once you enable the radio, will depend on your hardware and driver combination. - -e.g. With the BCM4318 on the Acer Aspire 5020 series: - -ndiswrapper: Light blinks on when transmitting -b43: Solid light, blinks off when transmitting - -Wireless radio control is unconditionally enabled - all Acer laptops that support -acer-wmi come with built-in wireless. However, should you feel so inclined to -ever wish to remove the card, or swap it out at some point, please get in touch -with me, as we may well be able to gain some data on wireless card detection. - -The wireless radio is exposed through rfkill. - -Bluetooth -********* - -For bluetooth, this is an internal USB dongle, so once enabled, you will get -a USB device connection event, and a new USB device appears. When you disable -bluetooth, you get the reverse - a USB device disconnect event, followed by the -device disappearing again. - -Bluetooth is autodetected by acer-wmi, so if you do not have a bluetooth module -installed in your laptop, this file won't exist (please be aware that it is -quite common for Acer not to fit bluetooth to their laptops - so just because -you have a bluetooth button on the laptop, doesn't mean that bluetooth is -installed). - -For the adventurously minded - if you want to buy an internal bluetooth -module off the internet that is compatible with your laptop and fit it, then -it will work just fine with acer-wmi. - -Bluetooth is exposed through rfkill. - -3G -** - -3G is currently not autodetected, so the 'threeg' file is always created under -sysfs. So far, no-one in possession of an Acer laptop with 3G built-in appears to -have tried Linux, or reported back, so we don't have any information on this. - -If you have an Acer laptop that does have a 3G card in, please contact me so we -can properly detect these, and find out a bit more about them. - -To read the status of the 3G card (0=off, 1=on): -cat /sys/devices/platform/acer-wmi/threeg - -To enable the 3G card: -echo 1 > /sys/devices/platform/acer-wmi/threeg - -To disable the 3G card: -echo 0 > /sys/devices/platform/acer-wmi/threeg - -To set the state of the 3G card when loading acer-wmi, pass: -threeg=X (where X is 0 or 1) - -Mail LED -******** - -This can be found in most older Acer laptops supported by acer-wmi, and many -newer ones - it is built into the 'mail' button, and blinks when active. - -On newer (WMID) laptops though, we have no way of detecting the mail LED. If -your laptop identifies itself in dmesg as a WMID model, then please try loading -acer_acpi with: - -force_series=2490 - -This will use a known alternative method of reading/ writing the mail LED. If -it works, please report back to me with the DMI data from your laptop so this -can be added to acer-wmi. - -The LED is exposed through the LED subsystem, and can be found in: - -/sys/devices/platform/acer-wmi/leds/acer-wmi::mail/ - -The mail LED is autodetected, so if you don't have one, the LED device won't -be registered. - -Backlight -********* - -The backlight brightness control is available on all acer-wmi supported -hardware. The maximum brightness level is usually 15, but on some newer laptops -it's 10 (this is again autodetected). - -The backlight is exposed through the backlight subsystem, and can be found in: - -/sys/devices/platform/acer-wmi/backlight/acer-wmi/ - -Credits -******* - -Olaf Tauber, who did the real hard work when he developed acerhk -http://www.cakey.de/acerhk/ -All the authors of laptop ACPI modules in the kernel, whose work -was an inspiration in the early days of acer_acpi -Mathieu Segaud, who solved the problem with having to modprobe the driver -twice in acer_acpi 0.2. -Jim Ramsay, who added support for the WMID interface -Mark Smith, who started the original acer_acpi - -And the many people who have used both acer_acpi and acer-wmi. diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt index 9c0a80d17a2..cef00d42ed5 100644 --- a/Documentation/lockstat.txt +++ b/Documentation/lockstat.txt @@ -12,8 +12,9 @@ Because things like lock contention can severely impact performance. - HOW Lockdep already has hooks in the lock functions and maps lock instances to -lock classes. We build on that. The graph below shows the relation between -the lock functions and the various hooks therein. +lock classes. We build on that (see Documentation/lockdep-design.txt). +The graph below shows the relation between the lock functions and the various +hooks therein. __acquire | @@ -128,6 +129,37 @@ points are the points we're contending with. The integer part of the time values is in us. +Dealing with nested locks, subclasses may appear: + +32............................................................................................................................................................................................... +33 +34 &rq->lock: 13128 13128 0.43 190.53 103881.26 97454 3453404 0.00 401.11 13224683.11 +35 --------- +36 &rq->lock 645 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75 +37 &rq->lock 297 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a +38 &rq->lock 360 [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a +39 &rq->lock 428 [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb +40 --------- +41 &rq->lock 77 [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75 +42 &rq->lock 174 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a +43 &rq->lock 4715 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54 +44 &rq->lock 893 [<ffffffff81340524>] schedule+0x157/0x7b8 +45 +46............................................................................................................................................................................................... +47 +48 &rq->lock/1: 11526 11488 0.33 388.73 136294.31 21461 38404 0.00 37.93 109388.53 +49 ----------- +50 &rq->lock/1 11526 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54 +51 ----------- +52 &rq->lock/1 5645 [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54 +53 &rq->lock/1 1224 [<ffffffff81340524>] schedule+0x157/0x7b8 +54 &rq->lock/1 4336 [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54 +55 &rq->lock/1 181 [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a + +Line 48 shows statistics for the second subclass (/1) of &rq->lock class +(subclass starts from 0), since in this case, as line 50 suggests, +double_rq_lock actually acquires a nested lock of two spinlocks. + View the top contending locks: # grep : /proc/lock_stat | head diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile index bebac6b4f33..0ac34206f7a 100644 --- a/Documentation/virtual/lguest/Makefile +++ b/Documentation/virtual/lguest/Makefile @@ -1,5 +1,5 @@ # This creates the demonstration utility "lguest" which runs a Linux guest. -# Missing headers? Add "-I../../include -I../../arch/x86/include" +# Missing headers? Add "-I../../../include -I../../../arch/x86/include" CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE all: lguest diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c index d9da7e14853..cd9d6af61d0 100644 --- a/Documentation/virtual/lguest/lguest.c +++ b/Documentation/virtual/lguest/lguest.c @@ -49,7 +49,7 @@ #include <linux/virtio_rng.h> #include <linux/virtio_ring.h> #include <asm/bootparam.h> -#include "../../include/linux/lguest_launcher.h" +#include "../../../include/linux/lguest_launcher.h" /*L:110 * We can ignore the 42 include files we need for this program, but I do want * to draw attention to the use of kernel-style types. @@ -135,9 +135,6 @@ struct device { /* Is it operational */ bool running; - /* Does Guest want an intrrupt on empty? */ - bool irq_on_empty; - /* Device-specific data. */ void *priv; }; @@ -637,10 +634,7 @@ static void trigger_irq(struct virtqueue *vq) /* If they don't want an interrupt, don't send one... */ if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { - /* ... unless they've asked us to force one on empty. */ - if (!vq->dev->irq_on_empty - || lg_last_avail(vq) != vq->vring.avail->idx) - return; + return; } /* Send the Guest an interrupt tell them we used something up. */ @@ -1057,15 +1051,6 @@ static void create_thread(struct virtqueue *vq) close(vq->eventfd); } -static bool accepted_feature(struct device *dev, unsigned int bit) -{ - const u8 *features = get_feature_bits(dev) + dev->feature_len; - - if (dev->feature_len < bit / CHAR_BIT) - return false; - return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT)); -} - static void start_device(struct device *dev) { unsigned int i; @@ -1079,8 +1064,6 @@ static void start_device(struct device *dev) verbose(" %02x", get_feature_bits(dev) [dev->feature_len+i]); - dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); - for (vq = dev->vq; vq; vq = vq->next) { if (vq->service) create_thread(vq); @@ -1564,7 +1547,6 @@ static void setup_tun_net(char *arg) /* Set up the tun device. */ configure_device(ipfd, tapif, ip); - add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); /* Expect Guest to handle everything except UFO */ add_feature(dev, VIRTIO_NET_F_CSUM); add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); diff --git a/MAINTAINERS b/MAINTAINERS index b9f5aee3637..29801f760b6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -223,10 +223,8 @@ S: Maintained F: drivers/platform/x86/acerhdf.c ACER WMI LAPTOP EXTRAS -M: Carlos Corbacho <carlos@strangeworlds.co.uk> -L: aceracpi@googlegroups.com (subscribers-only) +M: Joey Lee <jlee@novell.com> L: platform-driver-x86@vger.kernel.org -W: http://code.google.com/p/aceracpi S: Maintained F: drivers/platform/x86/acer-wmi.c @@ -271,10 +269,8 @@ S: Supported F: drivers/acpi/video.c ACPI WMI DRIVER -M: Carlos Corbacho <carlos@strangeworlds.co.uk> L: platform-driver-x86@vger.kernel.org -W: http://www.lesswatts.org/projects/acpi/ -S: Maintained +S: Orphan F: drivers/platform/x86/wmi.c AD1889 ALSA SOUND DRIVER @@ -2178,6 +2174,8 @@ M: Dan Williams <dan.j.williams@intel.com> S: Supported F: drivers/dma/ F: include/linux/dma* +T: git git://git.kernel.org/pub/scm/linux/kernel/git/djbw/async_tx.git +T: git git://git.infradead.org/users/vkoul/slave-dma.git (slave-dma) DME1737 HARDWARE MONITOR DRIVER M: Juerg Haefliger <juergh@gmail.com> @@ -3031,9 +3029,8 @@ S: Maintained F: drivers/net/wireless/hostap/ HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER -M: Carlos Corbacho <carlos@strangeworlds.co.uk> L: platform-driver-x86@vger.kernel.org -S: Odd Fixes +S: Orphan F: drivers/platform/x86/tc1100-wmi.c HP100: Driver for HP 10/100 Mbit/s Voice Grade Network Adapter Series @@ -5451,6 +5448,13 @@ L: linux-serial@vger.kernel.org S: Maintained F: drivers/tty/serial +SYNOPSYS DESIGNWARE DMAC DRIVER +M: Viresh Kumar <viresh.kumar@st.com> +S: Maintained +F: include/linux/dw_dmac.h +F: drivers/dma/dw_dmac_regs.h +F: drivers/dma/dw_dmac.c + TIMEKEEPING, NTP M: John Stultz <johnstul@us.ibm.com> M: Thomas Gleixner <tglx@linutronix.de> @@ -1,8 +1,8 @@ -VERSION = 2 -PATCHLEVEL = 6 -SUBLEVEL = 39 -EXTRAVERSION = -NAME = Flesh-Eating Bats with Fangs +VERSION = 3 +PATCHLEVEL = 0 +SUBLEVEL = 0 +EXTRAVERSION = -rc1 +NAME = Sneaky Weasel # *DOCUMENTATION* # To see a list of typical targets execute "make help" diff --git a/arch/alpha/include/asm/unistd.h b/arch/alpha/include/asm/unistd.h index b1834166922..4ac48a095f3 100644 --- a/arch/alpha/include/asm/unistd.h +++ b/arch/alpha/include/asm/unistd.h @@ -456,10 +456,11 @@ #define __NR_open_by_handle_at 498 #define __NR_clock_adjtime 499 #define __NR_syncfs 500 +#define __NR_setns 501 #ifdef __KERNEL__ -#define NR_SYSCALLS 501 +#define NR_SYSCALLS 502 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index 15f999d41c7..b9c28f3f195 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -519,6 +519,7 @@ sys_call_table: .quad sys_open_by_handle_at .quad sys_clock_adjtime .quad sys_syncfs /* 500 */ + .quad sys_setns .size sys_call_table, . - sys_call_table .type sys_call_table, @object diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 3de689aa6f6..2c04ed5efeb 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -401,6 +401,7 @@ #define __NR_clock_adjtime (__NR_SYSCALL_BASE+372) #define __NR_syncfs (__NR_SYSCALL_BASE+373) #define __NR_sendmmsg (__NR_SYSCALL_BASE+374) +#define __NR_setns (__NR_SYSCALL_BASE+375) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 24cdac3ce2e..80f7896cc01 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -384,6 +384,7 @@ CALL(sys_clock_adjtime) CALL(sys_syncfs) CALL(sys_sendmmsg) +/* 375 */ CALL(sys_setns) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/mach-exynos4/Makefile b/arch/arm/mach-exynos4/Makefile index 683fc387c8a..a9bb94fabaa 100644 --- a/arch/arm/mach-exynos4/Makefile +++ b/arch/arm/mach-exynos4/Makefile @@ -13,7 +13,7 @@ obj- := # Core support for EXYNOS4 system obj-$(CONFIG_CPU_EXYNOS4210) += cpu.o init.o clock.o irq-combiner.o -obj-$(CONFIG_CPU_EXYNOS4210) += setup-i2c0.o gpiolib.o irq-eint.o dma.o +obj-$(CONFIG_CPU_EXYNOS4210) += setup-i2c0.o irq-eint.o dma.o obj-$(CONFIG_PM) += pm.o sleep.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/arch/arm/mach-nomadik/Kconfig b/arch/arm/mach-nomadik/Kconfig index 71f3ea62397..3c5e0f522e9 100644 --- a/arch/arm/mach-nomadik/Kconfig +++ b/arch/arm/mach-nomadik/Kconfig @@ -6,7 +6,6 @@ config MACH_NOMADIK_8815NHK bool "ST 8815 Nomadik Hardware Kit (evaluation board)" select NOMADIK_8815 select HAS_MTU - select NOMADIK_GPIO endmenu diff --git a/arch/arm/mach-s5pc100/Makefile b/arch/arm/mach-s5pc100/Makefile index eecab57d2e5..a5e6e608b49 100644 --- a/arch/arm/mach-s5pc100/Makefile +++ b/arch/arm/mach-s5pc100/Makefile @@ -11,7 +11,7 @@ obj- := # Core support for S5PC100 system -obj-$(CONFIG_CPU_S5PC100) += cpu.o init.o clock.o gpiolib.o +obj-$(CONFIG_CPU_S5PC100) += cpu.o init.o clock.o obj-$(CONFIG_CPU_S5PC100) += setup-i2c0.o obj-$(CONFIG_CPU_S5PC100) += dma.o diff --git a/arch/arm/mach-s5pv210/Makefile b/arch/arm/mach-s5pv210/Makefile index 11f17907b4e..50907aca006 100644 --- a/arch/arm/mach-s5pv210/Makefile +++ b/arch/arm/mach-s5pv210/Makefile @@ -12,7 +12,7 @@ obj- := # Core support for S5PV210 system -obj-$(CONFIG_CPU_S5PV210) += cpu.o init.o clock.o dma.o gpiolib.o +obj-$(CONFIG_CPU_S5PV210) += cpu.o init.o clock.o dma.o obj-$(CONFIG_CPU_S5PV210) += setup-i2c0.o obj-$(CONFIG_S5PV210_PM) += pm.o sleep.o obj-$(CONFIG_CPU_FREQ) += cpufreq.o diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 08acb6ec813..f6b687f61c2 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -249,6 +249,29 @@ static int slot_cn7_get_cd(struct platform_device *pdev) { return !gpio_get_value(GPIO_PORT41); } +/* MERAM */ +static struct sh_mobile_meram_info meram_info = { + .addr_mode = SH_MOBILE_MERAM_MODE1, +}; + +static struct resource meram_resources[] = { + [0] = { + .name = "MERAM", + .start = 0xe8000000, + .end = 0xe81fffff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device meram_device = { + .name = "sh_mobile_meram", + .id = 0, + .num_resources = ARRAY_SIZE(meram_resources), + .resource = meram_resources, + .dev = { + .platform_data = &meram_info, + }, +}; /* SH_MMCIF */ static struct resource sh_mmcif_resources[] = { @@ -447,13 +470,29 @@ const static struct fb_videomode ap4evb_lcdc_modes[] = { #endif }, }; +static struct sh_mobile_meram_cfg lcd_meram_cfg = { + .icb[0] = { + .marker_icb = 28, + .cache_icb = 24, + .meram_offset = 0x0, + .meram_size = 0x40, + }, + .icb[1] = { + .marker_icb = 29, + .cache_icb = 25, + .meram_offset = 0x40, + .meram_size = 0x40, + }, +}; static struct sh_mobile_lcdc_info lcdc_info = { + .meram_dev = &meram_info, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .lcd_cfg = ap4evb_lcdc_modes, .num_cfg = ARRAY_SIZE(ap4evb_lcdc_modes), + .meram_cfg = &lcd_meram_cfg, } }; @@ -724,15 +763,31 @@ static struct platform_device fsi_device = { static struct platform_device fsi_ak4643_device = { .name = "sh_fsi2_a_ak4643", }; +static struct sh_mobile_meram_cfg hdmi_meram_cfg = { + .icb[0] = { + .marker_icb = 30, + .cache_icb = 26, + .meram_offset = 0x80, + .meram_size = 0x100, + }, + .icb[1] = { + .marker_icb = 31, + .cache_icb = 27, + .meram_offset = 0x180, + .meram_size = 0x100, + }, +}; static struct sh_mobile_lcdc_info sh_mobile_lcdc1_info = { .clock_source = LCDC_CLK_EXTERNAL, + .meram_dev = &meram_info, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, .bpp = 16, .interface_type = RGB24, .clock_divider = 1, .flags = LCDC_FLAGS_DWPOL, + .meram_cfg = &hdmi_meram_cfg, } }; @@ -961,6 +1016,7 @@ static struct platform_device *ap4evb_devices[] __initdata = { &csi2_device, &ceu_device, &ap4evb_camera, + &meram_device, }; static void __init hdmi_init_pm_clock(void) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index 448ddbe4333..776f20560e7 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -39,6 +39,7 @@ #include <linux/mtd/mtd.h> #include <linux/mtd/partitions.h> #include <linux/mtd/physmap.h> +#include <linux/pm_runtime.h> #include <linux/smsc911x.h> #include <linux/sh_intc.h> #include <linux/tca6416_keypad.h> @@ -314,6 +315,30 @@ static struct platform_device smc911x_device = { }, }; +/* MERAM */ +static struct sh_mobile_meram_info mackerel_meram_info = { + .addr_mode = SH_MOBILE_MERAM_MODE1, +}; + +static struct resource meram_resources[] = { + [0] = { + .name = "MERAM", + .start = 0xe8000000, + .end = 0xe81fffff, + .flags = IORESOURCE_MEM, + }, +}; + +static struct platform_device meram_device = { + .name = "sh_mobile_meram", + .id = 0, + .num_resources = ARRAY_SIZE(meram_resources), + .resource = meram_resources, + .dev = { + .platform_data = &mackerel_meram_info, + }, +}; + /* LCDC */ static struct fb_videomode mackerel_lcdc_modes[] = { { @@ -342,7 +367,23 @@ static int mackerel_get_brightness(void *board_data) return gpio_get_value(GPIO_PORT31); } +static struct sh_mobile_meram_cfg lcd_meram_cfg = { + .icb[0] = { + .marker_icb = 28, + .cache_icb = 24, + .meram_offset = 0x0, + .meram_size = 0x40, + }, + .icb[1] = { + .marker_icb = 29, + .cache_icb = 25, + .meram_offset = 0x40, + .meram_size = 0x40, + }, +}; + static struct sh_mobile_lcdc_info lcdc_info = { + .meram_dev = &mackerel_meram_info, .clock_source = LCDC_CLK_BUS, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, @@ -362,6 +403,7 @@ static struct sh_mobile_lcdc_info lcdc_info = { .name = "sh_mobile_lcdc_bl", .max_brightness = 1, }, + .meram_cfg = &lcd_meram_cfg, } }; @@ -388,8 +430,23 @@ static struct platform_device lcdc_device = { }, }; +static struct sh_mobile_meram_cfg hdmi_meram_cfg = { + .icb[0] = { + .marker_icb = 30, + .cache_icb = 26, + .meram_offset = 0x80, + .meram_size = 0x100, + }, + .icb[1] = { + .marker_icb = 31, + .cache_icb = 27, + .meram_offset = 0x180, + .meram_size = 0x100, + }, +}; /* HDMI */ static struct sh_mobile_lcdc_info hdmi_lcdc_info = { + .meram_dev = &mackerel_meram_info, .clock_source = LCDC_CLK_EXTERNAL, .ch[0] = { .chan = LCDC_CHAN_MAINLCD, @@ -397,6 +454,7 @@ static struct sh_mobile_lcdc_info hdmi_lcdc_info = { .interface_type = RGB24, .clock_divider = 1, .flags = LCDC_FLAGS_DWPOL, + .meram_cfg = &hdmi_meram_cfg, } }; @@ -856,6 +914,17 @@ static int slot_cn7_get_cd(struct platform_device *pdev) } /* SDHI0 */ +static irqreturn_t mackerel_sdhi0_gpio_cd(int irq, void *arg) +{ + struct device *dev = arg; + struct sh_mobile_sdhi_info *info = dev->platform_data; + struct tmio_mmc_data *pdata = info->pdata; + + tmio_mmc_cd_wakeup(pdata); + + return IRQ_HANDLED; +} + static struct sh_mobile_sdhi_info sdhi0_info = { .dma_slave_tx = SHDMA_SLAVE_SDHI0_TX, .dma_slave_rx = SHDMA_SLAVE_SDHI0_RX, @@ -1150,6 +1219,7 @@ static struct platform_device *mackerel_devices[] __initdata = { &mackerel_camera, &hdmi_lcdc_device, &hdmi_device, + &meram_device, }; /* Keypad Initialization */ @@ -1238,6 +1308,7 @@ static void __init mackerel_init(void) { u32 srcr4; struct clk *clk; + int ret; sh7372_pinmux_init(); @@ -1343,6 +1414,13 @@ static void __init mackerel_init(void) gpio_request(GPIO_FN_SDHID0_1, NULL); gpio_request(GPIO_FN_SDHID0_0, NULL); + ret = request_irq(evt2irq(0x3340), mackerel_sdhi0_gpio_cd, + IRQF_TRIGGER_FALLING, "sdhi0 cd", &sdhi0_device.dev); + if (!ret) + sdhi0_info.tmio_flags |= TMIO_MMC_HAS_COLD_CD; + else + pr_err("Cannot get IRQ #%d: %d\n", evt2irq(0x3340), ret); + #if !defined(CONFIG_MMC_SH_MMCIF) && !defined(CONFIG_MMC_SH_MMCIF_MODULE) /* enable SDHI1 */ gpio_request(GPIO_FN_SDHICMD1, NULL); diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index d17eb66f4ac..c0800d83971 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -509,6 +509,7 @@ enum { MSTP001, MSTP118, MSTP117, MSTP116, MSTP113, MSTP106, MSTP101, MSTP100, MSTP223, + MSTP218, MSTP217, MSTP216, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, MSTP329, MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312, MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403, @@ -534,6 +535,9 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP101] = MSTP(&div4_clks[DIV4_M1], SMSTPCR1, 1, 0), /* VPU */ [MSTP100] = MSTP(&div4_clks[DIV4_B], SMSTPCR1, 0, 0), /* LCDC0 */ [MSTP223] = MSTP(&div6_clks[DIV6_SPU], SMSTPCR2, 23, 0), /* SPU2 */ + [MSTP218] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* DMAC1 */ + [MSTP217] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 17, 0), /* DMAC2 */ + [MSTP216] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 16, 0), /* DMAC3 */ [MSTP207] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 7, 0), /* SCIFA5 */ [MSTP206] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 6, 0), /* SCIFB */ [MSTP204] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 4, 0), /* SCIFA0 */ @@ -626,6 +630,9 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh_mobile_lcdc_fb.0", &mstp_clks[MSTP100]), /* LCDC0 */ CLKDEV_DEV_ID("uio_pdrv_genirq.6", &mstp_clks[MSTP223]), /* SPU2DSP0 */ CLKDEV_DEV_ID("uio_pdrv_genirq.7", &mstp_clks[MSTP223]), /* SPU2DSP1 */ + CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), /* DMAC1 */ + CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[MSTP217]), /* DMAC2 */ + CLKDEV_DEV_ID("sh-dma-engine.2", &mstp_clks[MSTP216]), /* DMAC3 */ CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */ CLKDEV_DEV_ID("sh-sci.6", &mstp_clks[MSTP206]), /* SCIFB */ CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), /* SCIFA0 */ diff --git a/arch/arm/mach-u300/Makefile b/arch/arm/mach-u300/Makefile index fab46fe9a71..8fd354aaf0a 100644 --- a/arch/arm/mach-u300/Makefile +++ b/arch/arm/mach-u300/Makefile @@ -2,7 +2,7 @@ # Makefile for the linux kernel, U300 machine. # -obj-y := core.o clock.o timer.o gpio.o padmux.o +obj-y := core.o clock.o timer.o padmux.o obj-m := obj-n := obj- := diff --git a/arch/arm/mach-ux500/Kconfig b/arch/arm/mach-ux500/Kconfig index 54429d01595..f8b9392ee34 100644 --- a/arch/arm/mach-ux500/Kconfig +++ b/arch/arm/mach-ux500/Kconfig @@ -5,7 +5,6 @@ config UX500_SOC_COMMON default y select ARM_GIC select HAS_MTU - select NOMADIK_GPIO select ARM_ERRATA_753970 menu "Ux500 SoC" diff --git a/arch/arm/plat-nomadik/Kconfig b/arch/arm/plat-nomadik/Kconfig index 18296ee6880..ce659015535 100644 --- a/arch/arm/plat-nomadik/Kconfig +++ b/arch/arm/plat-nomadik/Kconfig @@ -21,9 +21,4 @@ config HAS_MTU to multiple interrupt generating programmable 32-bit free running decrementing counters. -config NOMADIK_GPIO - bool - help - Support for the Nomadik GPIO controller. - endif diff --git a/arch/arm/plat-nomadik/Makefile b/arch/arm/plat-nomadik/Makefile index c33547361bd..37c7cdd0f8f 100644 --- a/arch/arm/plat-nomadik/Makefile +++ b/arch/arm/plat-nomadik/Makefile @@ -3,4 +3,3 @@ # Licensed under GPLv2 obj-$(CONFIG_HAS_MTU) += timer.o -obj-$(CONFIG_NOMADIK_GPIO) += gpio.o diff --git a/arch/arm/plat-nomadik/include/plat/gpio.h b/arch/arm/plat-nomadik/include/plat/gpio.h index 1b9f6f0843d..ea19a5b2f22 100644 --- a/arch/arm/plat-nomadik/include/plat/gpio.h +++ b/arch/arm/plat-nomadik/include/plat/gpio.h @@ -78,6 +78,8 @@ extern int nmk_gpio_get_mode(int gpio); extern void nmk_gpio_wakeups_suspend(void); extern void nmk_gpio_wakeups_resume(void); +extern void nmk_gpio_read_pull(int gpio_bank, u32 *pull_up); + /* * Platform data to register a block: only the initial gpio/irq number. */ diff --git a/arch/arm/plat-omap/Makefile b/arch/arm/plat-omap/Makefile index a4a12859fdd..f0233e6abcd 100644 --- a/arch/arm/plat-omap/Makefile +++ b/arch/arm/plat-omap/Makefile @@ -3,7 +3,7 @@ # # Common support -obj-y := common.o sram.o clock.o devices.o dma.o mux.o gpio.o \ +obj-y := common.o sram.o clock.o devices.o dma.o mux.o \ usb.o fb.o io.o counter_32k.o obj-m := obj-n := diff --git a/arch/arm/plat-omap/include/plat/gpio.h b/arch/arm/plat-omap/include/plat/gpio.h index cac2e8ac696..ec97e00cb58 100644 --- a/arch/arm/plat-omap/include/plat/gpio.h +++ b/arch/arm/plat-omap/include/plat/gpio.h @@ -52,6 +52,109 @@ #define OMAP34XX_NR_GPIOS 6 +/* + * OMAP1510 GPIO registers + */ +#define OMAP1510_GPIO_DATA_INPUT 0x00 +#define OMAP1510_GPIO_DATA_OUTPUT 0x04 +#define OMAP1510_GPIO_DIR_CONTROL 0x08 +#define OMAP1510_GPIO_INT_CONTROL 0x0c +#define OMAP1510_GPIO_INT_MASK 0x10 +#define OMAP1510_GPIO_INT_STATUS 0x14 +#define OMAP1510_GPIO_PIN_CONTROL 0x18 + +#define OMAP1510_IH_GPIO_BASE 64 + +/* + * OMAP1610 specific GPIO registers + */ +#define OMAP1610_GPIO_REVISION 0x0000 +#define OMAP1610_GPIO_SYSCONFIG 0x0010 +#define OMAP1610_GPIO_SYSSTATUS 0x0014 +#define OMAP1610_GPIO_IRQSTATUS1 0x0018 +#define OMAP1610_GPIO_IRQENABLE1 0x001c +#define OMAP1610_GPIO_WAKEUPENABLE 0x0028 +#define OMAP1610_GPIO_DATAIN 0x002c +#define OMAP1610_GPIO_DATAOUT 0x0030 +#define OMAP1610_GPIO_DIRECTION 0x0034 +#define OMAP1610_GPIO_EDGE_CTRL1 0x0038 +#define OMAP1610_GPIO_EDGE_CTRL2 0x003c +#define OMAP1610_GPIO_CLEAR_IRQENABLE1 0x009c +#define OMAP1610_GPIO_CLEAR_WAKEUPENA 0x00a8 +#define OMAP1610_GPIO_CLEAR_DATAOUT 0x00b0 +#define OMAP1610_GPIO_SET_IRQENABLE1 0x00dc +#define OMAP1610_GPIO_SET_WAKEUPENA 0x00e8 +#define OMAP1610_GPIO_SET_DATAOUT 0x00f0 + +/* + * OMAP7XX specific GPIO registers + */ +#define OMAP7XX_GPIO_DATA_INPUT 0x00 +#define OMAP7XX_GPIO_DATA_OUTPUT 0x04 +#define OMAP7XX_GPIO_DIR_CONTROL 0x08 +#define OMAP7XX_GPIO_INT_CONTROL 0x0c +#define OMAP7XX_GPIO_INT_MASK 0x10 +#define OMAP7XX_GPIO_INT_STATUS 0x14 + +/* + * omap2+ specific GPIO registers + */ +#define OMAP24XX_GPIO_REVISION 0x0000 +#define OMAP24XX_GPIO_IRQSTATUS1 0x0018 +#define OMAP24XX_GPIO_IRQSTATUS2 0x0028 +#define OMAP24XX_GPIO_IRQENABLE2 0x002c +#define OMAP24XX_GPIO_IRQENABLE1 0x001c +#define OMAP24XX_GPIO_WAKE_EN 0x0020 +#define OMAP24XX_GPIO_CTRL 0x0030 +#define OMAP24XX_GPIO_OE 0x0034 +#define OMAP24XX_GPIO_DATAIN 0x0038 +#define OMAP24XX_GPIO_DATAOUT 0x003c +#define OMAP24XX_GPIO_LEVELDETECT0 0x0040 +#define OMAP24XX_GPIO_LEVELDETECT1 0x0044 +#define OMAP24XX_GPIO_RISINGDETECT 0x0048 +#define OMAP24XX_GPIO_FALLINGDETECT 0x004c +#define OMAP24XX_GPIO_DEBOUNCE_EN 0x0050 +#define OMAP24XX_GPIO_DEBOUNCE_VAL 0x0054 +#define OMAP24XX_GPIO_CLEARIRQENABLE1 0x0060 +#define OMAP24XX_GPIO_SETIRQENABLE1 0x0064 +#define OMAP24XX_GPIO_CLEARWKUENA 0x0080 +#define OMAP24XX_GPIO_SETWKUENA 0x0084 +#define OMAP24XX_GPIO_CLEARDATAOUT 0x0090 +#define OMAP24XX_GPIO_SETDATAOUT 0x0094 + +#define OMAP4_GPIO_REVISION 0x0000 +#define OMAP4_GPIO_EOI 0x0020 +#define OMAP4_GPIO_IRQSTATUSRAW0 0x0024 +#define OMAP4_GPIO_IRQSTATUSRAW1 0x0028 +#define OMAP4_GPIO_IRQSTATUS0 0x002c +#define OMAP4_GPIO_IRQSTATUS1 0x0030 +#define OMAP4_GPIO_IRQSTATUSSET0 0x0034 +#define OMAP4_GPIO_IRQSTATUSSET1 0x0038 +#define OMAP4_GPIO_IRQSTATUSCLR0 0x003c +#define OMAP4_GPIO_IRQSTATUSCLR1 0x0040 +#define OMAP4_GPIO_IRQWAKEN0 0x0044 +#define OMAP4_GPIO_IRQWAKEN1 0x0048 +#define OMAP4_GPIO_IRQENABLE1 0x011c +#define OMAP4_GPIO_WAKE_EN 0x0120 +#define OMAP4_GPIO_IRQSTATUS2 0x0128 +#define OMAP4_GPIO_IRQENABLE2 0x012c +#define OMAP4_GPIO_CTRL 0x0130 +#define OMAP4_GPIO_OE 0x0134 +#define OMAP4_GPIO_DATAIN 0x0138 +#define OMAP4_GPIO_DATAOUT 0x013c +#define OMAP4_GPIO_LEVELDETECT0 0x0140 +#define OMAP4_GPIO_LEVELDETECT1 0x0144 +#define OMAP4_GPIO_RISINGDETECT 0x0148 +#define OMAP4_GPIO_FALLINGDETECT 0x014c +#define OMAP4_GPIO_DEBOUNCENABLE 0x0150 +#define OMAP4_GPIO_DEBOUNCINGTIME 0x0154 +#define OMAP4_GPIO_CLEARIRQENABLE1 0x0160 +#define OMAP4_GPIO_SETIRQENABLE1 0x0164 +#define OMAP4_GPIO_CLEARWKUENA 0x0180 +#define OMAP4_GPIO_SETWKUENA 0x0184 +#define OMAP4_GPIO_CLEARDATAOUT 0x0190 +#define OMAP4_GPIO_SETDATAOUT 0x0194 + #define OMAP_MPUIO(nr) (OMAP_MAX_GPIO_LINES + (nr)) #define OMAP_GPIO_IS_MPUIO(nr) ((nr) >= OMAP_MAX_GPIO_LINES) diff --git a/arch/arm/plat-samsung/Makefile b/arch/arm/plat-samsung/Makefile index e9de58a2e29..53eb15b0a07 100644 --- a/arch/arm/plat-samsung/Makefile +++ b/arch/arm/plat-samsung/Makefile @@ -19,7 +19,6 @@ obj-y += gpio.o obj-y += gpio-config.o obj-y += dev-asocdma.o -obj-$(CONFIG_SAMSUNG_GPIOLIB_4BIT) += gpiolib.o obj-$(CONFIG_SAMSUNG_CLKSRC) += clock-clksrc.o obj-$(CONFIG_SAMSUNG_IRQ_UART) += irq-uart.o diff --git a/arch/avr32/include/asm/unistd.h b/arch/avr32/include/asm/unistd.h index 89861a27543..f714544e556 100644 --- a/arch/avr32/include/asm/unistd.h +++ b/arch/avr32/include/asm/unistd.h @@ -299,9 +299,10 @@ #define __NR_signalfd 279 /* 280 was __NR_timerfd */ #define __NR_eventfd 281 +#define __NR_setns 283 #ifdef __KERNEL__ -#define NR_syscalls 282 +#define NR_syscalls 284 /* Old stuff */ #define __IGNORE_uselib diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S index e76bad16b0f..c7fd394d28a 100644 --- a/arch/avr32/kernel/syscall_table.S +++ b/arch/avr32/kernel/syscall_table.S @@ -296,4 +296,5 @@ sys_call_table: .long sys_ni_syscall /* 280, was sys_timerfd */ .long sys_eventfd .long sys_recvmmsg + .long sys_setns .long sys_ni_syscall /* r8 is saturated at nr_syscalls */ diff --git a/arch/blackfin/include/asm/bfin_serial.h b/arch/blackfin/include/asm/bfin_serial.h index 7dbc664eab1..7fd0ec7b5b0 100644 --- a/arch/blackfin/include/asm/bfin_serial.h +++ b/arch/blackfin/include/asm/bfin_serial.h @@ -184,7 +184,7 @@ struct bfin_uart_regs { #undef __BFP #ifndef port_membase -# define port_membase(p) (((struct bfin_serial_port *)(p))->port.membase) +# define port_membase(p) 0 #endif #define UART_GET_CHAR(p) bfin_read16(port_membase(p) + OFFSET_RBR) @@ -235,10 +235,10 @@ struct bfin_uart_regs { #define UART_SET_DLAB(p) do { UART_PUT_LCR(p, UART_GET_LCR(p) | DLAB); SSYNC(); } while (0) #ifndef put_lsr_cache -# define put_lsr_cache(p, v) (((struct bfin_serial_port *)(p))->lsr = (v)) +# define put_lsr_cache(p, v) #endif #ifndef get_lsr_cache -# define get_lsr_cache(p) (((struct bfin_serial_port *)(p))->lsr) +# define get_lsr_cache(p) 0 #endif /* The hardware clears the LSR bits upon read, so we need to cache diff --git a/arch/blackfin/include/asm/gptimers.h b/arch/blackfin/include/asm/gptimers.h index c722acdda0d..38657dac123 100644 --- a/arch/blackfin/include/asm/gptimers.h +++ b/arch/blackfin/include/asm/gptimers.h @@ -193,4 +193,22 @@ uint16_t get_enabled_gptimers(void); uint32_t get_gptimer_status(unsigned int group); void set_gptimer_status(unsigned int group, uint32_t value); +/* + * All Blackfin system MMRs are padded to 32bits even if the register + * itself is only 16bits. So use a helper macro to streamline this. + */ +#define __BFP(m) u16 m; u16 __pad_##m + +/* + * bfin timer registers layout + */ +struct bfin_gptimer_regs { + __BFP(config); + u32 counter; + u32 period; + u32 width; +}; + +#undef __BFP + #endif diff --git a/arch/blackfin/include/asm/unistd.h b/arch/blackfin/include/asm/unistd.h index ff9a9f35d50..0ccba60b9cc 100644 --- a/arch/blackfin/include/asm/unistd.h +++ b/arch/blackfin/include/asm/unistd.h @@ -397,8 +397,10 @@ #define __NR_open_by_handle_at 376 #define __NR_clock_adjtime 377 #define __NR_syncfs 378 +#define __NR_setns 379 +#define __NR_sendmmsg 380 -#define __NR_syscall 379 +#define __NR_syscall 381 #define NR_syscalls __NR_syscall /* Old optional stuff no one actually uses */ diff --git a/arch/blackfin/kernel/debug-mmrs.c b/arch/blackfin/kernel/debug-mmrs.c index 94b1d8a0256..fce4807ceef 100644 --- a/arch/blackfin/kernel/debug-mmrs.c +++ b/arch/blackfin/kernel/debug-mmrs.c @@ -13,6 +13,7 @@ #include <asm/blackfin.h> #include <asm/gpio.h> +#include <asm/gptimers.h> #include <asm/bfin_can.h> #include <asm/bfin_dma.h> #include <asm/bfin_ppi.h> @@ -230,8 +231,8 @@ bfin_debug_mmrs_dma(struct dentry *parent, unsigned long base, int num, char mdm #define DMA(num) _DMA(num, DMA##num##_NEXT_DESC_PTR, 0, "") #define _MDMA(num, x) \ do { \ - _DMA(num, x##DMA_D##num##_CONFIG, 'D', #x); \ - _DMA(num, x##DMA_S##num##_CONFIG, 'S', #x); \ + _DMA(num, x##DMA_D##num##_NEXT_DESC_PTR, 'D', #x); \ + _DMA(num, x##DMA_S##num##_NEXT_DESC_PTR, 'S', #x); \ } while (0) #define MDMA(num) _MDMA(num, M) #define IMDMA(num) _MDMA(num, IM) @@ -264,20 +265,15 @@ bfin_debug_mmrs_eppi(struct dentry *parent, unsigned long base, int num) /* * General Purpose Timers */ -#define GPTIMER_OFF(mmr) (TIMER0_##mmr - TIMER0_CONFIG) -#define __GPTIMER(name) \ - do { \ - strcpy(_buf, #name); \ - debugfs_create_x16(buf, S_IRUSR|S_IWUSR, parent, (u16 *)(base + GPTIMER_OFF(name))); \ - } while (0) +#define __GPTIMER(uname, lname) __REGS(gptimer, #uname, lname) static void __init __maybe_unused bfin_debug_mmrs_gptimer(struct dentry *parent, unsigned long base, int num) { char buf[32], *_buf = REGS_STR_PFX(buf, TIMER, num); - __GPTIMER(CONFIG); - __GPTIMER(COUNTER); - __GPTIMER(PERIOD); - __GPTIMER(WIDTH); + __GPTIMER(CONFIG, config); + __GPTIMER(COUNTER, counter); + __GPTIMER(PERIOD, period); + __GPTIMER(WIDTH, width); } #define GPTIMER(num) bfin_debug_mmrs_gptimer(parent, TIMER##num##_CONFIG, num) @@ -355,7 +351,7 @@ bfin_debug_mmrs_ppi(struct dentry *parent, unsigned long base, int num) __PPI(DELAY, delay); __PPI(FRAME, frame); } -#define PPI(num) bfin_debug_mmrs_ppi(parent, PPI##num##_STATUS, num) +#define PPI(num) bfin_debug_mmrs_ppi(parent, PPI##num##_CONTROL, num) /* * SPI @@ -1288,15 +1284,15 @@ static int __init bfin_debug_mmrs_init(void) D16(VR_CTL); D32(CHIPID); /* it's part of this hardware block */ -#if defined(PPI_STATUS) || defined(PPI0_STATUS) || defined(PPI1_STATUS) +#if defined(PPI_CONTROL) || defined(PPI0_CONTROL) || defined(PPI1_CONTROL) parent = debugfs_create_dir("ppi", top); -# ifdef PPI_STATUS - bfin_debug_mmrs_ppi(parent, PPI_STATUS, -1); +# ifdef PPI_CONTROL + bfin_debug_mmrs_ppi(parent, PPI_CONTROL, -1); # endif -# ifdef PPI0_STATUS +# ifdef PPI0_CONTROL PPI(0); # endif -# ifdef PPI1_STATUS +# ifdef PPI1_CONTROL PPI(1); # endif #endif @@ -1341,6 +1337,10 @@ static int __init bfin_debug_mmrs_init(void) D16(RSI_PID1); D16(RSI_PID2); D16(RSI_PID3); + D16(RSI_PID4); + D16(RSI_PID5); + D16(RSI_PID6); + D16(RSI_PID7); D16(RSI_PWR_CONTROL); D16(RSI_RD_WAIT_EN); D32(RSI_RESPONSE0); diff --git a/arch/blackfin/lib/strncpy.S b/arch/blackfin/lib/strncpy.S index f3931d50b4a..2c07dddac99 100644 --- a/arch/blackfin/lib/strncpy.S +++ b/arch/blackfin/lib/strncpy.S @@ -25,7 +25,7 @@ ENTRY(_strncpy) CC = R2 == 0; - if CC JUMP 4f; + if CC JUMP 6f; P2 = R2 ; /* size */ P0 = R0 ; /* dst*/ diff --git a/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h deleted file mode 100644 index f6d924ac0c4..00000000000 --- a/arch/blackfin/mach-bf518/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2008-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later - */ - -#include <asm/dma.h> -#include <asm/portmux.h> - -#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) -# define CONFIG_SERIAL_BFIN_CTSRTS - -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_CTS_PIN -# define CONFIG_UART1_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_RTS_PIN -# define CONFIG_UART1_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { -#ifdef CONFIG_SERIAL_BFIN_UART0 - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART1 - { - 0xFFC02000, - IRQ_UART1_RX, - IRQ_UART1_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART1_TX, - CH_UART1_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART1_CTS_PIN, - CONFIG_UART1_RTS_PIN, -#endif - }, -#endif -}; - -#define DRIVER_NAME "bfin-uart" - -#include <asm/bfin_serial.h> diff --git a/arch/blackfin/mach-bf518/include/mach/defBF514.h b/arch/blackfin/mach-bf518/include/mach/defBF514.h index 98a51c47929..cfab428e577 100644 --- a/arch/blackfin/mach-bf518/include/mach/defBF514.h +++ b/arch/blackfin/mach-bf518/include/mach/defBF514.h @@ -36,13 +36,13 @@ #define RSI_EMASK 0xFFC038C4 /* RSI Exception Mask Register */ #define RSI_CONFIG 0xFFC038C8 /* RSI Configuration Register */ #define RSI_RD_WAIT_EN 0xFFC038CC /* RSI Read Wait Enable Register */ -#define RSI_PID0 0xFFC03FE0 /* RSI Peripheral ID Register 0 */ -#define RSI_PID1 0xFFC03FE4 /* RSI Peripheral ID Register 1 */ -#define RSI_PID2 0xFFC03FE8 /* RSI Peripheral ID Register 2 */ -#define RSI_PID3 0xFFC03FEC /* RSI Peripheral ID Register 3 */ -#define RSI_PID4 0xFFC03FF0 /* RSI Peripheral ID Register 4 */ -#define RSI_PID5 0xFFC03FF4 /* RSI Peripheral ID Register 5 */ -#define RSI_PID6 0xFFC03FF8 /* RSI Peripheral ID Register 6 */ -#define RSI_PID7 0xFFC03FFC /* RSI Peripheral ID Register 7 */ +#define RSI_PID0 0xFFC038D0 /* RSI Peripheral ID Register 0 */ +#define RSI_PID1 0xFFC038D4 /* RSI Peripheral ID Register 1 */ +#define RSI_PID2 0xFFC038D8 /* RSI Peripheral ID Register 2 */ +#define RSI_PID3 0xFFC038DC /* RSI Peripheral ID Register 3 */ +#define RSI_PID4 0xFFC038E0 /* RSI Peripheral ID Register 0 */ +#define RSI_PID5 0xFFC038E4 /* RSI Peripheral ID Register 1 */ +#define RSI_PID6 0xFFC038E8 /* RSI Peripheral ID Register 2 */ +#define RSI_PID7 0xFFC038EC /* RSI Peripheral ID Register 3 */ #endif /* _DEF_BF514_H */ diff --git a/arch/blackfin/mach-bf527/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf527/include/mach/bfin_serial_5xx.h deleted file mode 100644 index 960e08919de..00000000000 --- a/arch/blackfin/mach-bf527/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2007-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later - */ - -#include <asm/dma.h> -#include <asm/portmux.h> - -#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) -# define CONFIG_SERIAL_BFIN_CTSRTS - -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_CTS_PIN -# define CONFIG_UART1_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_RTS_PIN -# define CONFIG_UART1_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { -#ifdef CONFIG_SERIAL_BFIN_UART0 - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART1 - { - 0xFFC02000, - IRQ_UART1_RX, - IRQ_UART1_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART1_TX, - CH_UART1_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART1_CTS_PIN, - CONFIG_UART1_RTS_PIN, -#endif - }, -#endif -}; - -#define DRIVER_NAME "bfin-uart" - -#include <asm/bfin_serial.h> diff --git a/arch/blackfin/mach-bf527/include/mach/defBF525.h b/arch/blackfin/mach-bf527/include/mach/defBF525.h index cc383adfdff..aab80bb1a68 100644 --- a/arch/blackfin/mach-bf527/include/mach/defBF525.h +++ b/arch/blackfin/mach-bf527/include/mach/defBF525.h @@ -185,8 +185,8 @@ #define USB_EP_NI7_TXTYPE 0xffc03bd4 /* Sets the transaction protocol and peripheral endpoint number for the Host Tx endpoint7 */ #define USB_EP_NI7_TXINTERVAL 0xffc03bd8 /* Sets the NAK response timeout on Endpoint7 */ #define USB_EP_NI7_RXTYPE 0xffc03bdc /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint7 */ -#define USB_EP_NI7_RXINTERVAL 0xffc03bf0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */ -#define USB_EP_NI7_TXCOUNT 0xffc03bf8 /* Number of bytes to be written to the endpoint7 Tx FIFO */ +#define USB_EP_NI7_RXINTERVAL 0xffc03be0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */ +#define USB_EP_NI7_TXCOUNT 0xffc03be8 /* Number of bytes to be written to the endpoint7 Tx FIFO */ #define USB_DMA_INTERRUPT 0xffc03c00 /* Indicates pending interrupts for the DMA channels */ diff --git a/arch/blackfin/mach-bf533/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf533/include/mach/bfin_serial_5xx.h deleted file mode 100644 index 45dcaa4f3e4..00000000000 --- a/arch/blackfin/mach-bf533/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2006-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later - */ - -#include <asm/dma.h> -#include <asm/portmux.h> - -#ifdef CONFIG_BFIN_UART0_CTSRTS -# define CONFIG_SERIAL_BFIN_CTSRTS -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - } -}; - -#define DRIVER_NAME "bfin-uart" - -#include <asm/bfin_serial.h> diff --git a/arch/blackfin/mach-bf537/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf537/include/mach/bfin_serial_5xx.h deleted file mode 100644 index 3e955dba895..00000000000 --- a/arch/blackfin/mach-bf537/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2006-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later - */ - -#include <asm/dma.h> -#include <asm/portmux.h> - -#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) -# define CONFIG_SERIAL_BFIN_CTSRTS - -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_CTS_PIN -# define CONFIG_UART1_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_RTS_PIN -# define CONFIG_UART1_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { -#ifdef CONFIG_SERIAL_BFIN_UART0 - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART1 - { - 0xFFC02000, - IRQ_UART1_RX, - IRQ_UART1_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART1_TX, - CH_UART1_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART1_CTS_PIN, - CONFIG_UART1_RTS_PIN, -#endif - }, -#endif -}; - -#define DRIVER_NAME "bfin-uart" - -#include <asm/bfin_serial.h> diff --git a/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h deleted file mode 100644 index beb502e9cb3..00000000000 --- a/arch/blackfin/mach-bf538/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2008-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later. - */ - -#include <asm/dma.h> -#include <asm/portmux.h> - -#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) -# define CONFIG_SERIAL_BFIN_CTSRTS - -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_CTS_PIN -# define CONFIG_UART1_CTS_PIN -1 -# endif - -# ifndef CONFIG_UART1_RTS_PIN -# define CONFIG_UART1_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { -#ifdef CONFIG_SERIAL_BFIN_UART0 - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART1 - { - 0xFFC02000, - IRQ_UART1_RX, - IRQ_UART1_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART1_TX, - CH_UART1_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART1_CTS_PIN, - CONFIG_UART1_RTS_PIN, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART2 - { - 0xFFC02100, - IRQ_UART2_RX, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART2_TX, - CH_UART2_RX, -#endif -#ifdef CONFIG_BFIN_UART2_CTSRTS - CONFIG_UART2_CTS_PIN, - CONFIG_UART2_RTS_PIN, -#endif - }, -#endif -}; - -#define DRIVER_NAME "bfin-uart" - -#include <asm/bfin_serial.h> diff --git a/arch/blackfin/mach-bf548/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf548/include/mach/bfin_serial_5xx.h deleted file mode 100644 index 0d94edaaaa2..00000000000 --- a/arch/blackfin/mach-bf548/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2007-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later. - */ - -#include <asm/dma.h> -#include <asm/portmux.h> - -#if defined(CONFIG_BFIN_UART0_CTSRTS) || defined(CONFIG_BFIN_UART1_CTSRTS) || \ - defined(CONFIG_BFIN_UART2_CTSRTS) || defined(CONFIG_BFIN_UART3_CTSRTS) -# define CONFIG_SERIAL_BFIN_HARD_CTSRTS -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { -#ifdef CONFIG_SERIAL_BFIN_UART0 - { - 0xFFC00400, - IRQ_UART0_RX, - IRQ_UART0_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART0_TX, - CH_UART0_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS - 0, - 0, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART1 - { - 0xFFC02000, - IRQ_UART1_RX, - IRQ_UART1_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART1_TX, - CH_UART1_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS - GPIO_PE10, - GPIO_PE9, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART2 - { - 0xFFC02100, - IRQ_UART2_RX, - IRQ_UART2_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART2_TX, - CH_UART2_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS - 0, - 0, -#endif - }, -#endif -#ifdef CONFIG_SERIAL_BFIN_UART3 - { - 0xFFC03100, - IRQ_UART3_RX, - IRQ_UART3_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART3_TX, - CH_UART3_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS - GPIO_PB3, - GPIO_PB2, -#endif - }, -#endif -}; - -#define DRIVER_NAME "bfin-uart" - -#include <asm/bfin_serial.h> diff --git a/arch/blackfin/mach-bf548/include/mach/defBF547.h b/arch/blackfin/mach-bf548/include/mach/defBF547.h index 1cbba115f96..1fa41ec03f3 100644 --- a/arch/blackfin/mach-bf548/include/mach/defBF547.h +++ b/arch/blackfin/mach-bf548/include/mach/defBF547.h @@ -271,10 +271,10 @@ #define USB_EP_NI0_TXINTERVAL 0xffc03e18 /* Sets the NAK response timeout on Endpoint 0 */ #define USB_EP_NI0_RXTYPE 0xffc03e1c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint0 */ #define USB_EP_NI0_RXINTERVAL 0xffc03e20 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint0 */ +#define USB_EP_NI0_TXCOUNT 0xffc03e28 /* Number of bytes to be written to the endpoint0 Tx FIFO */ /* USB Endpoint 1 Control Registers */ -#define USB_EP_NI0_TXCOUNT 0xffc03e28 /* Number of bytes to be written to the endpoint0 Tx FIFO */ #define USB_EP_NI1_TXMAXP 0xffc03e40 /* Maximum packet size for Host Tx endpoint1 */ #define USB_EP_NI1_TXCSR 0xffc03e44 /* Control Status register for endpoint1 */ #define USB_EP_NI1_RXMAXP 0xffc03e48 /* Maximum packet size for Host Rx endpoint1 */ @@ -284,10 +284,10 @@ #define USB_EP_NI1_TXINTERVAL 0xffc03e58 /* Sets the NAK response timeout on Endpoint1 */ #define USB_EP_NI1_RXTYPE 0xffc03e5c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint1 */ #define USB_EP_NI1_RXINTERVAL 0xffc03e60 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint1 */ +#define USB_EP_NI1_TXCOUNT 0xffc03e68 /* Number of bytes to be written to the+H102 endpoint1 Tx FIFO */ /* USB Endpoint 2 Control Registers */ -#define USB_EP_NI1_TXCOUNT 0xffc03e68 /* Number of bytes to be written to the+H102 endpoint1 Tx FIFO */ #define USB_EP_NI2_TXMAXP 0xffc03e80 /* Maximum packet size for Host Tx endpoint2 */ #define USB_EP_NI2_TXCSR 0xffc03e84 /* Control Status register for endpoint2 */ #define USB_EP_NI2_RXMAXP 0xffc03e88 /* Maximum packet size for Host Rx endpoint2 */ @@ -297,10 +297,10 @@ #define USB_EP_NI2_TXINTERVAL 0xffc03e98 /* Sets the NAK response timeout on Endpoint2 */ #define USB_EP_NI2_RXTYPE 0xffc03e9c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint2 */ #define USB_EP_NI2_RXINTERVAL 0xffc03ea0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint2 */ +#define USB_EP_NI2_TXCOUNT 0xffc03ea8 /* Number of bytes to be written to the endpoint2 Tx FIFO */ /* USB Endpoint 3 Control Registers */ -#define USB_EP_NI2_TXCOUNT 0xffc03ea8 /* Number of bytes to be written to the endpoint2 Tx FIFO */ #define USB_EP_NI3_TXMAXP 0xffc03ec0 /* Maximum packet size for Host Tx endpoint3 */ #define USB_EP_NI3_TXCSR 0xffc03ec4 /* Control Status register for endpoint3 */ #define USB_EP_NI3_RXMAXP 0xffc03ec8 /* Maximum packet size for Host Rx endpoint3 */ @@ -310,10 +310,10 @@ #define USB_EP_NI3_TXINTERVAL 0xffc03ed8 /* Sets the NAK response timeout on Endpoint3 */ #define USB_EP_NI3_RXTYPE 0xffc03edc /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint3 */ #define USB_EP_NI3_RXINTERVAL 0xffc03ee0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint3 */ +#define USB_EP_NI3_TXCOUNT 0xffc03ee8 /* Number of bytes to be written to the H124endpoint3 Tx FIFO */ /* USB Endpoint 4 Control Registers */ -#define USB_EP_NI3_TXCOUNT 0xffc03ee8 /* Number of bytes to be written to the H124endpoint3 Tx FIFO */ #define USB_EP_NI4_TXMAXP 0xffc03f00 /* Maximum packet size for Host Tx endpoint4 */ #define USB_EP_NI4_TXCSR 0xffc03f04 /* Control Status register for endpoint4 */ #define USB_EP_NI4_RXMAXP 0xffc03f08 /* Maximum packet size for Host Rx endpoint4 */ @@ -323,10 +323,10 @@ #define USB_EP_NI4_TXINTERVAL 0xffc03f18 /* Sets the NAK response timeout on Endpoint4 */ #define USB_EP_NI4_RXTYPE 0xffc03f1c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint4 */ #define USB_EP_NI4_RXINTERVAL 0xffc03f20 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint4 */ +#define USB_EP_NI4_TXCOUNT 0xffc03f28 /* Number of bytes to be written to the endpoint4 Tx FIFO */ /* USB Endpoint 5 Control Registers */ -#define USB_EP_NI4_TXCOUNT 0xffc03f28 /* Number of bytes to be written to the endpoint4 Tx FIFO */ #define USB_EP_NI5_TXMAXP 0xffc03f40 /* Maximum packet size for Host Tx endpoint5 */ #define USB_EP_NI5_TXCSR 0xffc03f44 /* Control Status register for endpoint5 */ #define USB_EP_NI5_RXMAXP 0xffc03f48 /* Maximum packet size for Host Rx endpoint5 */ @@ -336,10 +336,10 @@ #define USB_EP_NI5_TXINTERVAL 0xffc03f58 /* Sets the NAK response timeout on Endpoint5 */ #define USB_EP_NI5_RXTYPE 0xffc03f5c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint5 */ #define USB_EP_NI5_RXINTERVAL 0xffc03f60 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint5 */ +#define USB_EP_NI5_TXCOUNT 0xffc03f68 /* Number of bytes to be written to the H145endpoint5 Tx FIFO */ /* USB Endpoint 6 Control Registers */ -#define USB_EP_NI5_TXCOUNT 0xffc03f68 /* Number of bytes to be written to the H145endpoint5 Tx FIFO */ #define USB_EP_NI6_TXMAXP 0xffc03f80 /* Maximum packet size for Host Tx endpoint6 */ #define USB_EP_NI6_TXCSR 0xffc03f84 /* Control Status register for endpoint6 */ #define USB_EP_NI6_RXMAXP 0xffc03f88 /* Maximum packet size for Host Rx endpoint6 */ @@ -349,10 +349,10 @@ #define USB_EP_NI6_TXINTERVAL 0xffc03f98 /* Sets the NAK response timeout on Endpoint6 */ #define USB_EP_NI6_RXTYPE 0xffc03f9c /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint6 */ #define USB_EP_NI6_RXINTERVAL 0xffc03fa0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint6 */ +#define USB_EP_NI6_TXCOUNT 0xffc03fa8 /* Number of bytes to be written to the endpoint6 Tx FIFO */ /* USB Endpoint 7 Control Registers */ -#define USB_EP_NI6_TXCOUNT 0xffc03fa8 /* Number of bytes to be written to the endpoint6 Tx FIFO */ #define USB_EP_NI7_TXMAXP 0xffc03fc0 /* Maximum packet size for Host Tx endpoint7 */ #define USB_EP_NI7_TXCSR 0xffc03fc4 /* Control Status register for endpoint7 */ #define USB_EP_NI7_RXMAXP 0xffc03fc8 /* Maximum packet size for Host Rx endpoint7 */ @@ -361,8 +361,9 @@ #define USB_EP_NI7_TXTYPE 0xffc03fd4 /* Sets the transaction protocol and peripheral endpoint number for the Host Tx endpoint7 */ #define USB_EP_NI7_TXINTERVAL 0xffc03fd8 /* Sets the NAK response timeout on Endpoint7 */ #define USB_EP_NI7_RXTYPE 0xffc03fdc /* Sets the transaction protocol and peripheral endpoint number for the Host Rx endpoint7 */ -#define USB_EP_NI7_RXINTERVAL 0xffc03ff0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */ -#define USB_EP_NI7_TXCOUNT 0xffc03ff8 /* Number of bytes to be written to the endpoint7 Tx FIFO */ +#define USB_EP_NI7_RXINTERVAL 0xffc03fe0 /* Sets the polling interval for Interrupt/Isochronous transfers or the NAK response timeout on Bulk transfers for Host Rx endpoint7 */ +#define USB_EP_NI7_TXCOUNT 0xffc03fe8 /* Number of bytes to be written to the endpoint7 Tx FIFO */ + #define USB_DMA_INTERRUPT 0xffc04000 /* Indicates pending interrupts for the DMA channels */ /* USB Channel 0 Config Registers */ diff --git a/arch/blackfin/mach-bf561/include/mach/bfin_serial_5xx.h b/arch/blackfin/mach-bf561/include/mach/bfin_serial_5xx.h deleted file mode 100644 index 3a6947456cf..00000000000 --- a/arch/blackfin/mach-bf561/include/mach/bfin_serial_5xx.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2006-2009 Analog Devices Inc. - * - * Licensed under the GPL-2 or later. - */ - -#include <asm/dma.h> -#include <asm/portmux.h> - -#ifdef CONFIG_BFIN_UART0_CTSRTS -# define CONFIG_SERIAL_BFIN_CTSRTS -# ifndef CONFIG_UART0_CTS_PIN -# define CONFIG_UART0_CTS_PIN -1 -# endif -# ifndef CONFIG_UART0_RTS_PIN -# define CONFIG_UART0_RTS_PIN -1 -# endif -#endif - -struct bfin_serial_res { - unsigned long uart_base_addr; - int uart_irq; - int uart_status_irq; -#ifdef CONFIG_SERIAL_BFIN_DMA - unsigned int uart_tx_dma_channel; - unsigned int uart_rx_dma_channel; -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - int uart_cts_pin; - int uart_rts_pin; -#endif -}; - -struct bfin_serial_res bfin_serial_resource[] = { - { - 0xFFC00400, - IRQ_UART_RX, - IRQ_UART_ERROR, -#ifdef CONFIG_SERIAL_BFIN_DMA - CH_UART_TX, - CH_UART_RX, -#endif -#ifdef CONFIG_SERIAL_BFIN_CTSRTS - CONFIG_UART0_CTS_PIN, - CONFIG_UART0_RTS_PIN, -#endif - } -}; - -#define DRIVER_NAME "bfin-uart" - -#include <asm/bfin_serial.h> diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S index f96933f48a7..225d311c970 100644 --- a/arch/blackfin/mach-common/entry.S +++ b/arch/blackfin/mach-common/entry.S @@ -1753,6 +1753,8 @@ ENTRY(_sys_call_table) .long _sys_open_by_handle_at .long _sys_clock_adjtime .long _sys_syncfs + .long _sys_setns + .long _sys_sendmmsg /* 380 */ .rept NR_syscalls-(.-_sys_call_table)/4 .long _sys_ni_syscall diff --git a/arch/blackfin/mm/maccess.c b/arch/blackfin/mm/maccess.c index b71cebc1f8a..e2532114c5f 100644 --- a/arch/blackfin/mm/maccess.c +++ b/arch/blackfin/mm/maccess.c @@ -16,7 +16,7 @@ static int validate_memory_access_address(unsigned long addr, int size) return bfin_mem_access_type(addr, size); } -long probe_kernel_read(void *dst, void *src, size_t size) +long probe_kernel_read(void *dst, const void *src, size_t size) { unsigned long lsrc = (unsigned long)src; int mem_type; @@ -55,7 +55,7 @@ long probe_kernel_read(void *dst, void *src, size_t size) return -EFAULT; } -long probe_kernel_write(void *dst, void *src, size_t size) +long probe_kernel_write(void *dst, const void *src, size_t size) { unsigned long ldst = (unsigned long)dst; int mem_type; diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S index 0d6420d087f..1161883eb58 100644 --- a/arch/cris/arch-v10/kernel/entry.S +++ b/arch/cris/arch-v10/kernel/entry.S @@ -937,6 +937,7 @@ sys_call_table: .long sys_inotify_init1 .long sys_preadv .long sys_pwritev + .long sys_setns /* 335 */ /* * NOTE!! This doesn't have to be exact - we just have diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S index 3abf12c23e5..84fed7e91ad 100644 --- a/arch/cris/arch-v32/kernel/entry.S +++ b/arch/cris/arch-v32/kernel/entry.S @@ -880,6 +880,7 @@ sys_call_table: .long sys_inotify_init1 .long sys_preadv .long sys_pwritev + .long sys_setns /* 335 */ /* * NOTE!! This doesn't have to be exact - we just have diff --git a/arch/cris/include/asm/unistd.h b/arch/cris/include/asm/unistd.h index f6fad83b3a8..f921b8b0f97 100644 --- a/arch/cris/include/asm/unistd.h +++ b/arch/cris/include/asm/unistd.h @@ -339,10 +339,11 @@ #define __NR_inotify_init1 332 #define __NR_preadv 333 #define __NR_pwritev 334 +#define __NR_setns 335 #ifdef __KERNEL__ -#define NR_syscalls 335 +#define NR_syscalls 336 #include <arch/unistd.h> diff --git a/arch/frv/include/asm/unistd.h b/arch/frv/include/asm/unistd.h index b28da499e22..a569dff7cd5 100644 --- a/arch/frv/include/asm/unistd.h +++ b/arch/frv/include/asm/unistd.h @@ -343,10 +343,11 @@ #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 #define __NR_perf_event_open 336 +#define __NR_setns 337 #ifdef __KERNEL__ -#define NR_syscalls 337 +#define NR_syscalls 338 #define __ARCH_WANT_IPC_PARSE_VERSION /* #define __ARCH_WANT_OLD_READDIR */ diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S index 63d579bf1c2..017d6d7b784 100644 --- a/arch/frv/kernel/entry.S +++ b/arch/frv/kernel/entry.S @@ -1526,5 +1526,6 @@ sys_call_table: .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ .long sys_perf_event_open + .long sys_setns syscall_table_size = (. - sys_call_table) diff --git a/arch/h8300/include/asm/unistd.h b/arch/h8300/include/asm/unistd.h index 50f2c5a3659..2c3f8e60b1e 100644 --- a/arch/h8300/include/asm/unistd.h +++ b/arch/h8300/include/asm/unistd.h @@ -325,10 +325,11 @@ #define __NR_move_pages 317 #define __NR_getcpu 318 #define __NR_epoll_pwait 319 +#define __NR_setns 320 #ifdef __KERNEL__ -#define NR_syscalls 320 +#define NR_syscalls 321 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S index faefaff7d43..f4b2e67bcc3 100644 --- a/arch/h8300/kernel/syscalls.S +++ b/arch/h8300/kernel/syscalls.S @@ -333,6 +333,7 @@ SYMBOL_NAME_LABEL(sys_call_table) .long SYMBOL_NAME(sys_ni_syscall) /* sys_move_pages */ .long SYMBOL_NAME(sys_getcpu) .long SYMBOL_NAME(sys_ni_syscall) /* sys_epoll_pwait */ + .long SYMBOL_NAME(sys_setns) /* 320 */ .macro call_sp addr mov.l #SYMBOL_NAME(\addr),er6 diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 404d037c5e1..7c928da35b1 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -319,11 +319,13 @@ #define __NR_open_by_handle_at 1327 #define __NR_clock_adjtime 1328 #define __NR_syncfs 1329 +#define __NR_setns 1330 +#define __NR_sendmmsg 1331 #ifdef __KERNEL__ -#define NR_syscalls 306 /* length of syscall table */ +#define NR_syscalls 308 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 6de2e23b363..97dd2abdeb1 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1775,6 +1775,8 @@ sys_call_table: data8 sys_open_by_handle_at data8 sys_clock_adjtime data8 sys_syncfs + data8 sys_setns // 1330 + data8 sys_sendmmsg .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/m32r/include/asm/unistd.h b/arch/m32r/include/asm/unistd.h index c70545689da..3e1db561aac 100644 --- a/arch/m32r/include/asm/unistd.h +++ b/arch/m32r/include/asm/unistd.h @@ -330,10 +330,11 @@ /* #define __NR_timerfd 322 removed */ #define __NR_eventfd 323 #define __NR_fallocate 324 +#define __NR_setns 325 #ifdef __KERNEL__ -#define NR_syscalls 325 +#define NR_syscalls 326 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_STAT64 diff --git a/arch/m32r/kernel/syscall_table.S b/arch/m32r/kernel/syscall_table.S index 60536e27123..528f2e6ad06 100644 --- a/arch/m32r/kernel/syscall_table.S +++ b/arch/m32r/kernel/syscall_table.S @@ -324,3 +324,4 @@ ENTRY(sys_call_table) .long sys_ni_syscall .long sys_eventfd .long sys_fallocate + .long sys_setns /* 325 */ diff --git a/arch/m68k/include/asm/unistd.h b/arch/m68k/include/asm/unistd.h index f3b649de2a1..43f984e9397 100644 --- a/arch/m68k/include/asm/unistd.h +++ b/arch/m68k/include/asm/unistd.h @@ -349,10 +349,11 @@ #define __NR_open_by_handle_at 341 #define __NR_clock_adjtime 342 #define __NR_syncfs 343 +#define __NR_setns 344 #ifdef __KERNEL__ -#define NR_syscalls 344 +#define NR_syscalls 345 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/m68k/kernel/syscalltable.S b/arch/m68k/kernel/syscalltable.S index 6f7b09122a0..00d1452f957 100644 --- a/arch/m68k/kernel/syscalltable.S +++ b/arch/m68k/kernel/syscalltable.S @@ -364,4 +364,5 @@ ENTRY(sys_call_table) .long sys_open_by_handle_at .long sys_clock_adjtime .long sys_syncfs + .long sys_setns diff --git a/arch/microblaze/include/asm/unistd.h b/arch/microblaze/include/asm/unistd.h index 30edd61a6b8..7d7092b917a 100644 --- a/arch/microblaze/include/asm/unistd.h +++ b/arch/microblaze/include/asm/unistd.h @@ -390,8 +390,9 @@ #define __NR_open_by_handle_at 372 #define __NR_clock_adjtime 373 #define __NR_syncfs 374 +#define __NR_setns 375 -#define __NR_syscalls 375 +#define __NR_syscalls 376 #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/arch/microblaze/kernel/syscall_table.S b/arch/microblaze/kernel/syscall_table.S index 85cea81d1ca..d915a122c86 100644 --- a/arch/microblaze/kernel/syscall_table.S +++ b/arch/microblaze/kernel/syscall_table.S @@ -379,3 +379,4 @@ ENTRY(sys_call_table) .long sys_open_by_handle_at .long sys_clock_adjtime .long sys_syncfs + .long sys_setns /* 375 */ diff --git a/arch/mips/include/asm/unistd.h b/arch/mips/include/asm/unistd.h index fa2e37ea2be..6fcfc480e9d 100644 --- a/arch/mips/include/asm/unistd.h +++ b/arch/mips/include/asm/unistd.h @@ -363,16 +363,17 @@ #define __NR_open_by_handle_at (__NR_Linux + 340) #define __NR_clock_adjtime (__NR_Linux + 341) #define __NR_syncfs (__NR_Linux + 342) +#define __NR_setns (__NR_Linux + 343) /* * Offset of the last Linux o32 flavoured syscall */ -#define __NR_Linux_syscalls 342 +#define __NR_Linux_syscalls 343 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */ #define __NR_O32_Linux 4000 -#define __NR_O32_Linux_syscalls 342 +#define __NR_O32_Linux_syscalls 343 #if _MIPS_SIM == _MIPS_SIM_ABI64 @@ -682,16 +683,17 @@ #define __NR_open_by_handle_at (__NR_Linux + 299) #define __NR_clock_adjtime (__NR_Linux + 300) #define __NR_syncfs (__NR_Linux + 301) +#define __NR_setns (__NR_Linux + 302) /* * Offset of the last Linux 64-bit flavoured syscall */ -#define __NR_Linux_syscalls 301 +#define __NR_Linux_syscalls 302 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */ #define __NR_64_Linux 5000 -#define __NR_64_Linux_syscalls 301 +#define __NR_64_Linux_syscalls 302 #if _MIPS_SIM == _MIPS_SIM_NABI32 @@ -1006,16 +1008,17 @@ #define __NR_open_by_handle_at (__NR_Linux + 304) #define __NR_clock_adjtime (__NR_Linux + 305) #define __NR_syncfs (__NR_Linux + 306) +#define __NR_setns (__NR_Linux + 307) /* * Offset of the last N32 flavoured syscall */ -#define __NR_Linux_syscalls 306 +#define __NR_Linux_syscalls 307 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */ #define __NR_N32_Linux 6000 -#define __NR_N32_Linux_syscalls 306 +#define __NR_N32_Linux_syscalls 307 #ifdef __KERNEL__ diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 7a8e1dd7f6f..99e656e425f 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -589,6 +589,7 @@ einval: li v0, -ENOSYS sys sys_open_by_handle_at 3 /* 4340 */ sys sys_clock_adjtime 2 sys sys_syncfs 1 + sys sys_setns 2 .endm /* We pre-compute the number of _instruction_ bytes needed to diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S index 2d31c83224f..fb0575f47f3 100644 --- a/arch/mips/kernel/scall64-64.S +++ b/arch/mips/kernel/scall64-64.S @@ -428,4 +428,5 @@ sys_call_table: PTR sys_open_by_handle_at PTR sys_clock_adjtime /* 5300 */ PTR sys_syncfs + PTR sys_setns .size sys_call_table,.-sys_call_table diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 38a0503b9a4..4de0c5534e7 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -428,4 +428,5 @@ EXPORT(sysn32_call_table) PTR sys_open_by_handle_at PTR compat_sys_clock_adjtime /* 6305 */ PTR sys_syncfs + PTR sys_setns .size sysn32_call_table,.-sysn32_call_table diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 91ea5e4041d..4a387de08bf 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -546,4 +546,5 @@ sys_call_table: PTR compat_sys_open_by_handle_at /* 4340 */ PTR compat_sys_clock_adjtime PTR sys_syncfs + PTR sys_setns .size sys_call_table,.-sys_call_table diff --git a/arch/mn10300/include/asm/unistd.h b/arch/mn10300/include/asm/unistd.h index 9d056f51592..9051f921cbc 100644 --- a/arch/mn10300/include/asm/unistd.h +++ b/arch/mn10300/include/asm/unistd.h @@ -349,10 +349,11 @@ #define __NR_rt_tgsigqueueinfo 336 #define __NR_perf_event_open 337 #define __NR_recvmmsg 338 +#define __NR_setns 339 #ifdef __KERNEL__ -#define NR_syscalls 339 +#define NR_syscalls 340 /* * specify the deprecated syscalls we want to support on this arch diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index fb93ad720b8..ae435e1d566 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -759,6 +759,7 @@ ENTRY(sys_call_table) .long sys_rt_tgsigqueueinfo .long sys_perf_event_open .long sys_recvmmsg + .long sys_setns nr_syscalls=(.-sys_call_table)/4 diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index 9cbc2c3bf63..3392de3e7be 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -820,8 +820,9 @@ #define __NR_name_to_handle_at (__NR_Linux + 325) #define __NR_open_by_handle_at (__NR_Linux + 326) #define __NR_syncfs (__NR_Linux + 327) +#define __NR_setns (__NR_Linux + 328) -#define __NR_Linux_syscalls (__NR_syncfs + 1) +#define __NR_Linux_syscalls (__NR_setns + 1) #define __IGNORE_select /* newselect */ diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S index a5b02ce4d41..34a4f5a2fff 100644 --- a/arch/parisc/kernel/syscall_table.S +++ b/arch/parisc/kernel/syscall_table.S @@ -426,6 +426,7 @@ ENTRY_SAME(name_to_handle_at) /* 325 */ ENTRY_COMP(open_by_handle_at) ENTRY_SAME(syncfs) + ENTRY_SAME(setns) /* Nothing yet */ diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 8489d372077..f6736b7da46 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -353,3 +353,4 @@ COMPAT_SYS_SPU(open_by_handle_at) COMPAT_SYS_SPU(clock_adjtime) SYSCALL_SPU(syncfs) COMPAT_SYS_SPU(sendmmsg) +SYSCALL_SPU(setns) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 6d23c8193ca..b8b3f599362 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -372,10 +372,11 @@ #define __NR_clock_adjtime 347 #define __NR_syncfs 348 #define __NR_sendmmsg 349 +#define __NR_setns 350 #ifdef __KERNEL__ -#define __NR_syscalls 350 +#define __NR_syscalls 351 #define __NR__exit __NR_exit #define NR_syscalls __NR_syscalls diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 9089b042119..7667db448aa 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -715,7 +715,8 @@ static struct syscore_ops pmacpic_syscore_ops = { static int __init init_pmacpic_syscore(void) { - register_syscore_ops(&pmacpic_syscore_ops); + if (pmac_irq_hw[0]) + register_syscore_ops(&pmacpic_syscore_ops); return 0; } diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c4773a2ef3d..e4efacfe1b6 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -577,16 +577,16 @@ static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long pfn, bits; + unsigned long address, bits; unsigned char skey; - pfn = pte_val(*ptep) >> PAGE_SHIFT; - skey = page_get_storage_key(pfn); + address = pte_val(*ptep) & PAGE_MASK; + skey = page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Clear page changed & referenced bit in the storage key */ if (bits) { skey ^= bits; - page_set_storage_key(pfn, skey, 1); + page_set_storage_key(address, skey, 1); } /* Transfer page changed & referenced bit to guest bits in pgste */ pgste_val(pgste) |= bits << 48; /* RCP_GR_BIT & RCP_GC_BIT */ @@ -628,16 +628,16 @@ static inline pgste_t pgste_update_young(pte_t *ptep, pgste_t pgste) static inline void pgste_set_pte(pte_t *ptep, pgste_t pgste) { #ifdef CONFIG_PGSTE - unsigned long pfn; + unsigned long address; unsigned long okey, nkey; - pfn = pte_val(*ptep) >> PAGE_SHIFT; - okey = nkey = page_get_storage_key(pfn); + address = pte_val(*ptep) & PAGE_MASK; + okey = nkey = page_get_storage_key(address); nkey &= ~(_PAGE_ACC_BITS | _PAGE_FP_BIT); /* Set page access key and fetch protection bit from pgste */ nkey |= (pgste_val(pgste) & (RCP_ACC_BITS | RCP_FP_BIT)) >> 56; if (okey != nkey) - page_set_storage_key(pfn, nkey, 1); + page_set_storage_key(address, nkey, 1); #endif } diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 9208e69245a..404bdb9671b 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -276,7 +276,8 @@ #define __NR_open_by_handle_at 336 #define __NR_clock_adjtime 337 #define __NR_syncfs 338 -#define NR_syscalls 339 +#define __NR_setns 339 +#define NR_syscalls 340 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index 1dc96ea08fa..1f5eb789c3a 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -1904,3 +1904,9 @@ compat_sys_clock_adjtime_wrapper: sys_syncfs_wrapper: lgfr %r2,%r2 # int jg sys_syncfs + + .globl sys_setns_wrapper +sys_setns_wrapper: + lgfr %r2,%r2 # int + lgfr %r3,%r3 # int + jg sys_setns diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9c65fd4ddce..6ee39ef8fe4 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -347,3 +347,4 @@ SYSCALL(sys_name_to_handle_at,sys_name_to_handle_at,sys_name_to_handle_at_wrappe SYSCALL(sys_open_by_handle_at,sys_open_by_handle_at,compat_sys_open_by_handle_at_wrapper) SYSCALL(sys_clock_adjtime,sys_clock_adjtime,compat_sys_clock_adjtime_wrapper) SYSCALL(sys_syncfs,sys_syncfs,sys_syncfs_wrapper) +SYSCALL(sys_setns,sys_setns,sys_setns_wrapper) diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 71a4b0d34be..51e5cd9b906 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -19,7 +19,7 @@ * using the stura instruction. * Returns the number of bytes copied or -EFAULT. */ -static long probe_kernel_write_odd(void *dst, void *src, size_t size) +static long probe_kernel_write_odd(void *dst, const void *src, size_t size) { unsigned long count, aligned; int offset, mask; @@ -45,7 +45,7 @@ static long probe_kernel_write_odd(void *dst, void *src, size_t size) return rc ? rc : count; } -long probe_kernel_write(void *dst, void *src, size_t size) +long probe_kernel_write(void *dst, const void *src, size_t size) { long copied = 0; diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 14c6fae6fe6..b09763fe5da 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -71,12 +71,15 @@ static void rcu_table_freelist_callback(struct rcu_head *head) void rcu_table_freelist_finish(void) { - struct rcu_table_freelist *batch = __get_cpu_var(rcu_table_freelist); + struct rcu_table_freelist **batchp = &get_cpu_var(rcu_table_freelist); + struct rcu_table_freelist *batch = *batchp; if (!batch) - return; + goto out; call_rcu(&batch->rcu, rcu_table_freelist_callback); - __get_cpu_var(rcu_table_freelist) = NULL; + *batchp = NULL; +out: + put_cpu_var(rcu_table_freelist); } static void smp_sync(void *arg) @@ -141,20 +144,23 @@ void crst_table_free_rcu(struct mm_struct *mm, unsigned long *table) { struct rcu_table_freelist *batch; + preempt_disable(); if (atomic_read(&mm->mm_users) < 2 && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { crst_table_free(mm, table); - return; + goto out; } batch = rcu_table_freelist_get(mm); if (!batch) { smp_call_function(smp_sync, NULL, 1); crst_table_free(mm, table); - return; + goto out; } batch->table[--batch->crst_index] = table; if (batch->pgt_index >= batch->crst_index) rcu_table_freelist_finish(); +out: + preempt_enable(); } #ifdef CONFIG_64BIT @@ -323,16 +329,17 @@ void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned long bits; + preempt_disable(); if (atomic_read(&mm->mm_users) < 2 && cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) { page_table_free(mm, table); - return; + goto out; } batch = rcu_table_freelist_get(mm); if (!batch) { smp_call_function(smp_sync, NULL, 1); page_table_free(mm, table); - return; + goto out; } bits = (mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); @@ -345,6 +352,8 @@ void page_table_free_rcu(struct mm_struct *mm, unsigned long *table) batch->table[batch->pgt_index++] = table; if (batch->pgt_index >= batch->crst_index) rcu_table_freelist_finish(); +out: + preempt_enable(); } /* diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 74495a5ea02..f03338c2f08 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -161,7 +161,7 @@ config ARCH_HAS_CPU_IDLE_WAIT config NO_IOPORT def_bool !PCI - depends on !SH_CAYMAN && !SH_SH4202_MICRODEV + depends on !SH_CAYMAN && !SH_SH4202_MICRODEV && !SH_SHMIN config IO_TRAPPED bool diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index 618bd566cf5..969421f64a1 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -359,37 +359,31 @@ static struct soc_camera_link camera_link = { .priv = &camera_info, }; -static void dummy_release(struct device *dev) +static struct platform_device *camera_device; + +static void ap325rxa_camera_release(struct device *dev) { + soc_camera_platform_release(&camera_device); } -static struct platform_device camera_device = { - .name = "soc_camera_platform", - .dev = { - .platform_data = &camera_info, - .release = dummy_release, - }, -}; - static int ap325rxa_camera_add(struct soc_camera_link *icl, struct device *dev) { - if (icl != &camera_link || camera_probe() <= 0) - return -ENODEV; + int ret = soc_camera_platform_add(icl, dev, &camera_device, &camera_link, + ap325rxa_camera_release, 0); + if (ret < 0) + return ret; - camera_info.dev = dev; + ret = camera_probe(); + if (ret < 0) + soc_camera_platform_del(icl, camera_device, &camera_link); - return platform_device_register(&camera_device); + return ret; } static void ap325rxa_camera_del(struct soc_camera_link *icl) { - if (icl != &camera_link) - return; - - platform_device_unregister(&camera_device); - memset(&camera_device.dev.kobj, 0, - sizeof(camera_device.dev.kobj)); + soc_camera_platform_del(icl, camera_device, &camera_link); } #endif /* CONFIG_I2C */ diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index bb13d0e1b96..3a32741cc0a 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -885,6 +885,9 @@ static struct platform_device sh_mmcif_device = { }, .num_resources = ARRAY_SIZE(sh_mmcif_resources), .resource = sh_mmcif_resources, + .archdata = { + .hwblk_id = HWBLK_MMC, + }, }; #endif diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h index db85916b9e9..9210e93a92c 100644 --- a/arch/sh/include/asm/pgtable.h +++ b/arch/sh/include/asm/pgtable.h @@ -18,6 +18,7 @@ #include <asm/pgtable-2level.h> #endif #include <asm/page.h> +#include <asm/mmu.h> #ifndef __ASSEMBLY__ #include <asm/addrspace.h> diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h index 40725b4a801..88bd6be168a 100644 --- a/arch/sh/include/asm/ptrace.h +++ b/arch/sh/include/asm/ptrace.h @@ -41,7 +41,9 @@ #define user_mode(regs) (((regs)->sr & 0x40000000)==0) #define kernel_stack_pointer(_regs) ((unsigned long)(_regs)->regs[15]) -#define GET_USP(regs) ((regs)->regs[15]) + +#define GET_FP(regs) ((regs)->regs[14]) +#define GET_USP(regs) ((regs)->regs[15]) extern void show_regs(struct pt_regs *); @@ -131,7 +133,7 @@ extern void ptrace_triggered(struct perf_event *bp, int nmi, static inline unsigned long profile_pc(struct pt_regs *regs) { - unsigned long pc = instruction_pointer(regs); + unsigned long pc = regs->pc; if (virt_addr_uncached(pc)) return CAC_ADDR(pc); diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 6c308d8b9a5..ec88bfcdf7c 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -9,6 +9,7 @@ #include <linux/pagemap.h> #ifdef CONFIG_MMU +#include <linux/swap.h> #include <asm/pgalloc.h> #include <asm/tlbflush.h> #include <asm/mmu_context.h> diff --git a/arch/sh/include/asm/unistd_32.h b/arch/sh/include/asm/unistd_32.h index bb7d2702c2c..3432008d288 100644 --- a/arch/sh/include/asm/unistd_32.h +++ b/arch/sh/include/asm/unistd_32.h @@ -374,8 +374,9 @@ #define __NR_clock_adjtime 361 #define __NR_syncfs 362 #define __NR_sendmmsg 363 +#define __NR_setns 364 -#define NR_syscalls 364 +#define NR_syscalls 365 #ifdef __KERNEL__ diff --git a/arch/sh/include/asm/unistd_64.h b/arch/sh/include/asm/unistd_64.h index 46327cea1e5..ec9898665f2 100644 --- a/arch/sh/include/asm/unistd_64.h +++ b/arch/sh/include/asm/unistd_64.h @@ -395,10 +395,11 @@ #define __NR_clock_adjtime 372 #define __NR_syncfs 373 #define __NR_sendmmsg 374 +#define __NR_setns 375 #ifdef __KERNEL__ -#define NR_syscalls 375 +#define NR_syscalls 376 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/sh/include/cpu-sh4/cpu/sh7722.h b/arch/sh/include/cpu-sh4/cpu/sh7722.h index 7a5b8a331b4..bd0622788d6 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7722.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7722.h @@ -236,6 +236,7 @@ enum { }; enum { + SHDMA_SLAVE_INVALID, SHDMA_SLAVE_SCIF0_TX, SHDMA_SLAVE_SCIF0_RX, SHDMA_SLAVE_SCIF1_TX, diff --git a/arch/sh/include/cpu-sh4/cpu/sh7724.h b/arch/sh/include/cpu-sh4/cpu/sh7724.h index 7eb43599942..3daef8ecbc6 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7724.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7724.h @@ -285,6 +285,7 @@ enum { }; enum { + SHDMA_SLAVE_INVALID, SHDMA_SLAVE_SCIF0_TX, SHDMA_SLAVE_SCIF0_RX, SHDMA_SLAVE_SCIF1_TX, diff --git a/arch/sh/include/cpu-sh4/cpu/sh7757.h b/arch/sh/include/cpu-sh4/cpu/sh7757.h index 05b8196c775..41f9f8b9db7 100644 --- a/arch/sh/include/cpu-sh4/cpu/sh7757.h +++ b/arch/sh/include/cpu-sh4/cpu/sh7757.h @@ -252,6 +252,7 @@ enum { }; enum { + SHDMA_SLAVE_INVALID, SHDMA_SLAVE_SDHI_TX, SHDMA_SLAVE_SDHI_RX, SHDMA_SLAVE_MMCIF_TX, diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 762a13984bb..b473f0c06fb 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -21,6 +21,7 @@ #include <linux/fs.h> #include <linux/ftrace.h> #include <linux/hw_breakpoint.h> +#include <linux/prefetch.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> #include <asm/system.h> diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S index 7c486f3e3a3..39b051de4c7 100644 --- a/arch/sh/kernel/syscalls_32.S +++ b/arch/sh/kernel/syscalls_32.S @@ -381,3 +381,4 @@ ENTRY(sys_call_table) .long sys_clock_adjtime .long sys_syncfs .long sys_sendmmsg + .long sys_setns diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S index ba1a737afe8..089c4d825d0 100644 --- a/arch/sh/kernel/syscalls_64.S +++ b/arch/sh/kernel/syscalls_64.S @@ -401,3 +401,4 @@ sys_call_table: .long sys_clock_adjtime .long sys_syncfs .long sys_sendmmsg + .long sys_setns /* 375 */ diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 40733a95240..f251b5f2765 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -82,7 +82,7 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size, void *addr; addr = __in_29bit_mode() ? - (void *)P1SEGADDR((unsigned long)vaddr) : vaddr; + (void *)CAC_ADDR((unsigned long)vaddr) : vaddr; switch (direction) { case DMA_FROM_DEVICE: /* invalidate only */ diff --git a/arch/sparc/include/asm/unistd.h b/arch/sparc/include/asm/unistd.h index c5387ed0add..6260d5deeab 100644 --- a/arch/sparc/include/asm/unistd.h +++ b/arch/sparc/include/asm/unistd.h @@ -405,8 +405,9 @@ #define __NR_clock_adjtime 334 #define __NR_syncfs 335 #define __NR_sendmmsg 336 +#define __NR_setns 337 -#define NR_syscalls 337 +#define NR_syscalls 338 #ifdef __32bit_syscall_numbers__ /* Sparc 32-bit only has the "setresuid32", "getresuid32" variants, diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 332c83ff770..6e492d59f6b 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -84,4 +84,4 @@ sys_call_table: /*320*/ .long sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv /*325*/ .long sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init /*330*/ .long sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime -/*335*/ .long sys_syncfs, sys_sendmmsg +/*335*/ .long sys_syncfs, sys_sendmmsg, sys_setns diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 43887ca0be0..f566518483b 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -85,7 +85,7 @@ sys_call_table32: /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, compat_sys_preadv .word compat_sys_pwritev, compat_sys_rt_tgsigqueueinfo, sys_perf_event_open, compat_sys_recvmmsg, sys_fanotify_init /*330*/ .word sys32_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, compat_sys_open_by_handle_at, compat_sys_clock_adjtime - .word sys_syncfs, compat_sys_sendmmsg + .word sys_syncfs, compat_sys_sendmmsg, sys_setns #endif /* CONFIG_COMPAT */ @@ -162,4 +162,4 @@ sys_call_table: /*320*/ .word sys_dup3, sys_pipe2, sys_inotify_init1, sys_accept4, sys_preadv .word sys_pwritev, sys_rt_tgsigqueueinfo, sys_perf_event_open, sys_recvmmsg, sys_fanotify_init /*330*/ .word sys_fanotify_mark, sys_prlimit64, sys_name_to_handle_at, sys_open_by_handle_at, sys_clock_adjtime - .word sys_syncfs, sys_sendmmsg + .word sys_syncfs, sys_sendmmsg, sys_setns diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index e1e50101b3b..0249b8b4db5 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -11,6 +11,7 @@ config TILE select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP select GENERIC_IRQ_SHOW + select SYS_HYPERVISOR # FIXME: investigate whether we need/want these options. # select HAVE_IOREMAP_PROT diff --git a/arch/tile/include/asm/hardwall.h b/arch/tile/include/asm/hardwall.h index 0bed3ec7b42..2ac422848c7 100644 --- a/arch/tile/include/asm/hardwall.h +++ b/arch/tile/include/asm/hardwall.h @@ -40,6 +40,10 @@ #define HARDWALL_DEACTIVATE \ _IO(HARDWALL_IOCTL_BASE, _HARDWALL_DEACTIVATE) +#define _HARDWALL_GET_ID 4 +#define HARDWALL_GET_ID \ + _IO(HARDWALL_IOCTL_BASE, _HARDWALL_GET_ID) + #ifndef __KERNEL__ /* This is the canonical name expected by userspace. */ @@ -47,9 +51,14 @@ #else -/* Hook for /proc/tile/hardwall. */ -struct seq_file; -int proc_tile_hardwall_show(struct seq_file *sf, void *v); +/* /proc hooks for hardwall. */ +struct proc_dir_entry; +#ifdef CONFIG_HARDWALL +void proc_tile_hardwall_init(struct proc_dir_entry *root); +int proc_pid_hardwall(struct task_struct *task, char *buffer); +#else +static inline void proc_tile_hardwall_init(struct proc_dir_entry *root) {} +#endif #endif diff --git a/arch/tile/kernel/Makefile b/arch/tile/kernel/Makefile index b4c8e8ec45d..b4dbc057baa 100644 --- a/arch/tile/kernel/Makefile +++ b/arch/tile/kernel/Makefile @@ -5,7 +5,7 @@ extra-y := vmlinux.lds head_$(BITS).o obj-y := backtrace.o entry.o init_task.o irq.o messaging.o \ pci-dma.o proc.o process.o ptrace.o reboot.o \ - setup.o signal.o single_step.o stack.o sys.o time.o traps.o \ + setup.o signal.o single_step.o stack.o sys.o sysfs.o time.o traps.o \ intvec_$(BITS).o regs_$(BITS).o tile-desc_$(BITS).o obj-$(CONFIG_HARDWALL) += hardwall.o diff --git a/arch/tile/kernel/hardwall.c b/arch/tile/kernel/hardwall.c index 3bddef710de..8c41891aab3 100644 --- a/arch/tile/kernel/hardwall.c +++ b/arch/tile/kernel/hardwall.c @@ -40,16 +40,25 @@ struct hardwall_info { struct list_head list; /* "rectangles" list */ struct list_head task_head; /* head of tasks in this hardwall */ + struct cpumask cpumask; /* cpus in the rectangle */ int ulhc_x; /* upper left hand corner x coord */ int ulhc_y; /* upper left hand corner y coord */ int width; /* rectangle width */ int height; /* rectangle height */ + int id; /* integer id for this hardwall */ int teardown_in_progress; /* are we tearing this one down? */ }; /* Currently allocated hardwall rectangles */ static LIST_HEAD(rectangles); +/* /proc/tile/hardwall */ +static struct proc_dir_entry *hardwall_proc_dir; + +/* Functions to manage files in /proc/tile/hardwall. */ +static void hardwall_add_proc(struct hardwall_info *rect); +static void hardwall_remove_proc(struct hardwall_info *rect); + /* * Guard changes to the hardwall data structures. * This could be finer grained (e.g. one lock for the list of hardwall @@ -105,6 +114,8 @@ static int setup_rectangle(struct hardwall_info *r, struct cpumask *mask) r->ulhc_y = cpu_y(ulhc); r->width = cpu_x(lrhc) - r->ulhc_x + 1; r->height = cpu_y(lrhc) - r->ulhc_y + 1; + cpumask_copy(&r->cpumask, mask); + r->id = ulhc; /* The ulhc cpu id can be the hardwall id. */ /* Width and height must be positive */ if (r->width <= 0 || r->height <= 0) @@ -388,6 +399,9 @@ static struct hardwall_info *hardwall_create( /* Set up appropriate hardwalling on all affected cpus. */ hardwall_setup(rect); + /* Create a /proc/tile/hardwall entry. */ + hardwall_add_proc(rect); + return rect; } @@ -645,6 +659,9 @@ static void hardwall_destroy(struct hardwall_info *rect) /* Restart switch and disable firewall. */ on_each_cpu_mask(&mask, restart_udn_switch, NULL, 1); + /* Remove the /proc/tile/hardwall entry. */ + hardwall_remove_proc(rect); + /* Now free the rectangle from the list. */ spin_lock_irqsave(&hardwall_lock, flags); BUG_ON(!list_empty(&rect->task_head)); @@ -654,35 +671,57 @@ static void hardwall_destroy(struct hardwall_info *rect) } -/* - * Dump hardwall state via /proc; initialized in arch/tile/sys/proc.c. - */ -int proc_tile_hardwall_show(struct seq_file *sf, void *v) +static int hardwall_proc_show(struct seq_file *sf, void *v) { - struct hardwall_info *r; + struct hardwall_info *rect = sf->private; + char buf[256]; - if (udn_disabled) { - seq_printf(sf, "%dx%d 0,0 pids:\n", smp_width, smp_height); - return 0; - } - - spin_lock_irq(&hardwall_lock); - list_for_each_entry(r, &rectangles, list) { - struct task_struct *p; - seq_printf(sf, "%dx%d %d,%d pids:", - r->width, r->height, r->ulhc_x, r->ulhc_y); - list_for_each_entry(p, &r->task_head, thread.hardwall_list) { - unsigned int cpu = cpumask_first(&p->cpus_allowed); - unsigned int x = cpu % smp_width; - unsigned int y = cpu / smp_width; - seq_printf(sf, " %d@%d,%d", p->pid, x, y); - } - seq_printf(sf, "\n"); - } - spin_unlock_irq(&hardwall_lock); + int rc = cpulist_scnprintf(buf, sizeof(buf), &rect->cpumask); + buf[rc++] = '\n'; + seq_write(sf, buf, rc); return 0; } +static int hardwall_proc_open(struct inode *inode, + struct file *file) +{ + return single_open(file, hardwall_proc_show, PDE(inode)->data); +} + +static const struct file_operations hardwall_proc_fops = { + .open = hardwall_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void hardwall_add_proc(struct hardwall_info *rect) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", rect->id); + proc_create_data(buf, 0444, hardwall_proc_dir, + &hardwall_proc_fops, rect); +} + +static void hardwall_remove_proc(struct hardwall_info *rect) +{ + char buf[64]; + snprintf(buf, sizeof(buf), "%d", rect->id); + remove_proc_entry(buf, hardwall_proc_dir); +} + +int proc_pid_hardwall(struct task_struct *task, char *buffer) +{ + struct hardwall_info *rect = task->thread.hardwall; + return rect ? sprintf(buffer, "%d\n", rect->id) : 0; +} + +void proc_tile_hardwall_init(struct proc_dir_entry *root) +{ + if (!udn_disabled) + hardwall_proc_dir = proc_mkdir("hardwall", root); +} + /* * Character device support via ioctl/close. @@ -716,6 +755,9 @@ static long hardwall_ioctl(struct file *file, unsigned int a, unsigned long b) return -EINVAL; return hardwall_deactivate(current); + case _HARDWALL_GET_ID: + return rect ? rect->id : -EINVAL; + default: return -EINVAL; } diff --git a/arch/tile/kernel/proc.c b/arch/tile/kernel/proc.c index 2e02c41ddf3..62d820833c6 100644 --- a/arch/tile/kernel/proc.c +++ b/arch/tile/kernel/proc.c @@ -27,6 +27,7 @@ #include <asm/processor.h> #include <asm/sections.h> #include <asm/homecache.h> +#include <asm/hardwall.h> #include <arch/chip.h> @@ -88,3 +89,75 @@ const struct seq_operations cpuinfo_op = { .stop = c_stop, .show = show_cpuinfo, }; + +/* + * Support /proc/tile directory + */ + +static int __init proc_tile_init(void) +{ + struct proc_dir_entry *root = proc_mkdir("tile", NULL); + if (root == NULL) + return 0; + + proc_tile_hardwall_init(root); + + return 0; +} + +arch_initcall(proc_tile_init); + +/* + * Support /proc/sys/tile directory + */ + +#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ +static ctl_table unaligned_subtable[] = { + { + .procname = "enabled", + .data = &unaligned_fixup, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "printk", + .data = &unaligned_printk, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { + .procname = "count", + .data = &unaligned_fixup_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + {} +}; + +static ctl_table unaligned_table[] = { + { + .procname = "unaligned_fixup", + .mode = 0555, + .child = unaligned_subtable + }, + {} +}; +#endif + +static struct ctl_path tile_path[] = { + { .procname = "tile" }, + { } +}; + +static int __init proc_sys_tile_init(void) +{ +#ifndef __tilegx__ /* FIXME: GX: no support for unaligned access yet */ + register_sysctl_paths(tile_path, unaligned_table); +#endif + return 0; +} + +arch_initcall(proc_sys_tile_init); diff --git a/arch/tile/kernel/sysfs.c b/arch/tile/kernel/sysfs.c new file mode 100644 index 00000000000..b671a86f451 --- /dev/null +++ b/arch/tile/kernel/sysfs.c @@ -0,0 +1,185 @@ +/* + * Copyright 2011 Tilera Corporation. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for + * more details. + * + * /sys entry support. + */ + +#include <linux/sysdev.h> +#include <linux/cpu.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <hv/hypervisor.h> + +/* Return a string queried from the hypervisor, truncated to page size. */ +static ssize_t get_hv_confstr(char *page, int query) +{ + ssize_t n = hv_confstr(query, (unsigned long)page, PAGE_SIZE - 1); + n = n < 0 ? 0 : min(n, (ssize_t)PAGE_SIZE - 1) - 1; + if (n) + page[n++] = '\n'; + page[n] = '\0'; + return n; +} + +static ssize_t chip_width_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_width); +} +static SYSDEV_CLASS_ATTR(chip_width, 0444, chip_width_show, NULL); + +static ssize_t chip_height_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return sprintf(page, "%u\n", smp_height); +} +static SYSDEV_CLASS_ATTR(chip_height, 0444, chip_height_show, NULL); + +static ssize_t chip_serial_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_SERIAL_NUM); +} +static SYSDEV_CLASS_ATTR(chip_serial, 0444, chip_serial_show, NULL); + +static ssize_t chip_revision_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return get_hv_confstr(page, HV_CONFSTR_CHIP_REV); +} +static SYSDEV_CLASS_ATTR(chip_revision, 0444, chip_revision_show, NULL); + + +static ssize_t type_show(struct sysdev_class *dev, + struct sysdev_class_attribute *attr, + char *page) +{ + return sprintf(page, "tilera\n"); +} +static SYSDEV_CLASS_ATTR(type, 0444, type_show, NULL); + +#define HV_CONF_ATTR(name, conf) \ + static ssize_t name ## _show(struct sysdev_class *dev, \ + struct sysdev_class_attribute *attr, \ + char *page) \ + { \ + return get_hv_confstr(page, conf); \ + } \ + static SYSDEV_CLASS_ATTR(name, 0444, name ## _show, NULL); + +HV_CONF_ATTR(version, HV_CONFSTR_HV_SW_VER) +HV_CONF_ATTR(config_version, HV_CONFSTR_HV_CONFIG_VER) + +HV_CONF_ATTR(board_part, HV_CONFSTR_BOARD_PART_NUM) +HV_CONF_ATTR(board_serial, HV_CONFSTR_BOARD_SERIAL_NUM) +HV_CONF_ATTR(board_revision, HV_CONFSTR_BOARD_REV) +HV_CONF_ATTR(board_description, HV_CONFSTR_BOARD_DESC) +HV_CONF_ATTR(mezz_part, HV_CONFSTR_MEZZ_PART_NUM) +HV_CONF_ATTR(mezz_serial, HV_CONFSTR_MEZZ_SERIAL_NUM) +HV_CONF_ATTR(mezz_revision, HV_CONFSTR_MEZZ_REV) +HV_CONF_ATTR(mezz_description, HV_CONFSTR_MEZZ_DESC) +HV_CONF_ATTR(switch_control, HV_CONFSTR_SWITCH_CONTROL) + +static struct attribute *board_attrs[] = { + &attr_board_part.attr, + &attr_board_serial.attr, + &attr_board_revision.attr, + &attr_board_description.attr, + &attr_mezz_part.attr, + &attr_mezz_serial.attr, + &attr_mezz_revision.attr, + &attr_mezz_description.attr, + &attr_switch_control.attr, + NULL +}; + +static struct attribute_group board_attr_group = { + .name = "board", + .attrs = board_attrs, +}; + + +static struct bin_attribute hvconfig_bin; + +static ssize_t +hvconfig_bin_read(struct file *filp, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buf, loff_t off, size_t count) +{ + static size_t size; + + /* Lazily learn the true size (minus the trailing NUL). */ + if (size == 0) + size = hv_confstr(HV_CONFSTR_HV_CONFIG, 0, 0) - 1; + + /* Check and adjust input parameters. */ + if (off > size) + return -EINVAL; + if (count > size - off) + count = size - off; + + if (count) { + /* Get a copy of the hvc and copy out the relevant portion. */ + char *hvc; + + size = off + count; + hvc = kmalloc(size, GFP_KERNEL); + if (hvc == NULL) + return -ENOMEM; + hv_confstr(HV_CONFSTR_HV_CONFIG, (unsigned long)hvc, size); + memcpy(buf, hvc + off, count); + kfree(hvc); + } + + return count; +} + +static int __init create_sysfs_entries(void) +{ + struct sysdev_class *cls = &cpu_sysdev_class; + int err = 0; + +#define create_cpu_attr(name) \ + if (!err) \ + err = sysfs_create_file(&cls->kset.kobj, &attr_##name.attr); + create_cpu_attr(chip_width); + create_cpu_attr(chip_height); + create_cpu_attr(chip_serial); + create_cpu_attr(chip_revision); + +#define create_hv_attr(name) \ + if (!err) \ + err = sysfs_create_file(hypervisor_kobj, &attr_##name.attr); + create_hv_attr(type); + create_hv_attr(version); + create_hv_attr(config_version); + + if (!err) + err = sysfs_create_group(hypervisor_kobj, &board_attr_group); + + if (!err) { + sysfs_bin_attr_init(&hvconfig_bin); + hvconfig_bin.attr.name = "hvconfig"; + hvconfig_bin.attr.mode = S_IRUGO; + hvconfig_bin.read = hvconfig_bin_read; + hvconfig_bin.size = PAGE_SIZE; + err = sysfs_create_bin_file(hypervisor_kobj, &hvconfig_bin); + } + + return err; +} +subsys_initcall(create_sysfs_entries); diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 95f5826be45..c1870dddd32 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -849,4 +849,5 @@ ia32_sys_call_table: .quad compat_sys_clock_adjtime .quad sys_syncfs .quad compat_sys_sendmmsg /* 345 */ + .quad sys_setns ia32_syscall_end: diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 416d865eae3..610001d385d 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -139,7 +139,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) boot_cpu_data.x86_model <= 0x05 && boot_cpu_data.x86_mask < 0x0A) return 1; - else if (c1e_detected) + else if (amd_e400_c1e_detected) return 1; else return max_cstate; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 5dc6acc98db..71cc3800712 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -125,7 +125,7 @@ #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ #define X86_FEATURE_F16C (4*32+29) /* 16-bit fp conversions */ -#define X86_FEATURE_RDRND (4*32+30) /* The RDRAND instruction */ +#define X86_FEATURE_RDRAND (4*32+30) /* The RDRAND instruction */ #define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 617bd56b307..7b439d9aea2 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -4,30 +4,33 @@ #include <asm/desc_defs.h> #include <asm/ldt.h> #include <asm/mmu.h> + #include <linux/smp.h> -static inline void fill_ldt(struct desc_struct *desc, - const struct user_desc *info) -{ - desc->limit0 = info->limit & 0x0ffff; - desc->base0 = info->base_addr & 0x0000ffff; - - desc->base1 = (info->base_addr & 0x00ff0000) >> 16; - desc->type = (info->read_exec_only ^ 1) << 1; - desc->type |= info->contents << 2; - desc->s = 1; - desc->dpl = 0x3; - desc->p = info->seg_not_present ^ 1; - desc->limit = (info->limit & 0xf0000) >> 16; - desc->avl = info->useable; - desc->d = info->seg_32bit; - desc->g = info->limit_in_pages; - desc->base2 = (info->base_addr & 0xff000000) >> 24; +static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info) +{ + desc->limit0 = info->limit & 0x0ffff; + + desc->base0 = (info->base_addr & 0x0000ffff); + desc->base1 = (info->base_addr & 0x00ff0000) >> 16; + + desc->type = (info->read_exec_only ^ 1) << 1; + desc->type |= info->contents << 2; + + desc->s = 1; + desc->dpl = 0x3; + desc->p = info->seg_not_present ^ 1; + desc->limit = (info->limit & 0xf0000) >> 16; + desc->avl = info->useable; + desc->d = info->seg_32bit; + desc->g = info->limit_in_pages; + + desc->base2 = (info->base_addr & 0xff000000) >> 24; /* * Don't allow setting of the lm bit. It is useless anyway * because 64bit system calls require __USER_CS: */ - desc->l = 0; + desc->l = 0; } extern struct desc_ptr idt_descr; @@ -36,6 +39,7 @@ extern gate_desc idt_table[]; struct gdt_page { struct desc_struct gdt[GDT_ENTRIES]; } __attribute__((aligned(PAGE_SIZE))); + DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page); static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) @@ -48,16 +52,16 @@ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, unsigned dpl, unsigned ist, unsigned seg) { - gate->offset_low = PTR_LOW(func); - gate->segment = __KERNEL_CS; - gate->ist = ist; - gate->p = 1; - gate->dpl = dpl; - gate->zero0 = 0; - gate->zero1 = 0; - gate->type = type; - gate->offset_middle = PTR_MIDDLE(func); - gate->offset_high = PTR_HIGH(func); + gate->offset_low = PTR_LOW(func); + gate->segment = __KERNEL_CS; + gate->ist = ist; + gate->p = 1; + gate->dpl = dpl; + gate->zero0 = 0; + gate->zero1 = 0; + gate->type = type; + gate->offset_middle = PTR_MIDDLE(func); + gate->offset_high = PTR_HIGH(func); } #else @@ -66,8 +70,7 @@ static inline void pack_gate(gate_desc *gate, unsigned char type, unsigned short seg) { gate->a = (seg << 16) | (base & 0xffff); - gate->b = (base & 0xffff0000) | - (((0x80 | type | (dpl << 5)) & 0xff) << 8); + gate->b = (base & 0xffff0000) | (((0x80 | type | (dpl << 5)) & 0xff) << 8); } #endif @@ -75,31 +78,29 @@ static inline void pack_gate(gate_desc *gate, unsigned char type, static inline int desc_empty(const void *ptr) { const u32 *desc = ptr; + return !(desc[0] | desc[1]); } #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #else -#define load_TR_desc() native_load_tr_desc() -#define load_gdt(dtr) native_load_gdt(dtr) -#define load_idt(dtr) native_load_idt(dtr) -#define load_tr(tr) asm volatile("ltr %0"::"m" (tr)) -#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt)) - -#define store_gdt(dtr) native_store_gdt(dtr) -#define store_idt(dtr) native_store_idt(dtr) -#define store_tr(tr) (tr = native_store_tr()) - -#define load_TLS(t, cpu) native_load_tls(t, cpu) -#define set_ldt native_set_ldt - -#define write_ldt_entry(dt, entry, desc) \ - native_write_ldt_entry(dt, entry, desc) -#define write_gdt_entry(dt, entry, desc, type) \ - native_write_gdt_entry(dt, entry, desc, type) -#define write_idt_entry(dt, entry, g) \ - native_write_idt_entry(dt, entry, g) +#define load_TR_desc() native_load_tr_desc() +#define load_gdt(dtr) native_load_gdt(dtr) +#define load_idt(dtr) native_load_idt(dtr) +#define load_tr(tr) asm volatile("ltr %0"::"m" (tr)) +#define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt)) + +#define store_gdt(dtr) native_store_gdt(dtr) +#define store_idt(dtr) native_store_idt(dtr) +#define store_tr(tr) (tr = native_store_tr()) + +#define load_TLS(t, cpu) native_load_tls(t, cpu) +#define set_ldt native_set_ldt + +#define write_ldt_entry(dt, entry, desc) native_write_ldt_entry(dt, entry, desc) +#define write_gdt_entry(dt, entry, desc, type) native_write_gdt_entry(dt, entry, desc, type) +#define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g) static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries) { @@ -112,33 +113,27 @@ static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries) #define store_ldt(ldt) asm("sldt %0" : "=m"(ldt)) -static inline void native_write_idt_entry(gate_desc *idt, int entry, - const gate_desc *gate) +static inline void native_write_idt_entry(gate_desc *idt, int entry, const gate_desc *gate) { memcpy(&idt[entry], gate, sizeof(*gate)); } -static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, - const void *desc) +static inline void native_write_ldt_entry(struct desc_struct *ldt, int entry, const void *desc) { memcpy(&ldt[entry], desc, 8); } -static inline void native_write_gdt_entry(struct desc_struct *gdt, int entry, - const void *desc, int type) +static inline void +native_write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc, int type) { unsigned int size; + switch (type) { - case DESC_TSS: - size = sizeof(tss_desc); - break; - case DESC_LDT: - size = sizeof(ldt_desc); - break; - default: - size = sizeof(struct desc_struct); - break; + case DESC_TSS: size = sizeof(tss_desc); break; + case DESC_LDT: size = sizeof(ldt_desc); break; + default: size = sizeof(*gdt); break; } + memcpy(&gdt[entry], desc, size); } @@ -154,20 +149,21 @@ static inline void pack_descriptor(struct desc_struct *desc, unsigned long base, } -static inline void set_tssldt_descriptor(void *d, unsigned long addr, - unsigned type, unsigned size) +static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned type, unsigned size) { #ifdef CONFIG_X86_64 struct ldttss_desc64 *desc = d; + memset(desc, 0, sizeof(*desc)); - desc->limit0 = size & 0xFFFF; - desc->base0 = PTR_LOW(addr); - desc->base1 = PTR_MIDDLE(addr) & 0xFF; - desc->type = type; - desc->p = 1; - desc->limit1 = (size >> 16) & 0xF; - desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF; - desc->base3 = PTR_HIGH(addr); + + desc->limit0 = size & 0xFFFF; + desc->base0 = PTR_LOW(addr); + desc->base1 = PTR_MIDDLE(addr) & 0xFF; + desc->type = type; + desc->p = 1; + desc->limit1 = (size >> 16) & 0xF; + desc->base2 = (PTR_MIDDLE(addr) >> 8) & 0xFF; + desc->base3 = PTR_HIGH(addr); #else pack_descriptor((struct desc_struct *)d, addr, size, 0x80 | type, 0); #endif @@ -237,14 +233,16 @@ static inline void native_store_idt(struct desc_ptr *dtr) static inline unsigned long native_store_tr(void) { unsigned long tr; + asm volatile("str %0":"=r" (tr)); + return tr; } static inline void native_load_tls(struct thread_struct *t, unsigned int cpu) { - unsigned int i; struct desc_struct *gdt = get_cpu_gdt_table(cpu); + unsigned int i; for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i]; @@ -313,6 +311,7 @@ static inline void _set_gate(int gate, unsigned type, void *addr, unsigned dpl, unsigned ist, unsigned seg) { gate_desc s; + pack_gate(&s, type, (unsigned long)addr, dpl, ist, seg); /* * does not need to be atomic because it is only done once at @@ -343,8 +342,9 @@ static inline void alloc_system_vector(int vector) set_bit(vector, used_vectors); if (first_system_vector > vector) first_system_vector = vector; - } else + } else { BUG(); + } } static inline void alloc_intr_gate(unsigned int n, void *addr) diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index 38d87379e27..f49253d7571 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h @@ -16,6 +16,6 @@ static inline void enter_idle(void) { } static inline void exit_idle(void) { } #endif /* CONFIG_X86_64 */ -void c1e_remove_cpu(int cpu); +void amd_e400_remove_cpu(int cpu); #endif /* _ASM_X86_IDLE_H */ diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index aeff3e89b22..5f55e696276 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -11,14 +11,14 @@ typedef struct { void *ldt; int size; - struct mutex lock; - void *vdso; #ifdef CONFIG_X86_64 /* True if mm supports a task running in 32 bit compatibility mode. */ unsigned short ia32_compat; #endif + struct mutex lock; + void *vdso; } mm_context_t; #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 4c25ab48257..219371546af 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -754,10 +754,10 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx) extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); extern void select_idle_routine(const struct cpuinfo_x86 *c); -extern void init_c1e_mask(void); +extern void init_amd_e400_c1e_mask(void); extern unsigned long boot_option_idle_override; -extern bool c1e_detected; +extern bool amd_e400_c1e_detected; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, IDLE_POLL, IDLE_FORCE_MWAIT}; diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index fb6a625c99b..593485b38ab 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -351,10 +351,11 @@ #define __NR_clock_adjtime 343 #define __NR_syncfs 344 #define __NR_sendmmsg 345 +#define __NR_setns 346 #ifdef __KERNEL__ -#define NR_syscalls 346 +#define NR_syscalls 347 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 79f90eb15aa..705bf139288 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -679,6 +679,8 @@ __SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_sendmmsg 307 __SYSCALL(__NR_sendmmsg, sys_sendmmsg) +#define __NR_setns 308 +__SYSCALL(__NR_setns, sys_setns) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 130f1eeee5f..a291c40efd4 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h @@ -5,7 +5,7 @@ * * SGI UV Broadcast Assist Unit definitions * - * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2008-2011 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_X86_UV_UV_BAU_H @@ -35,17 +35,20 @@ #define MAX_CPUS_PER_UVHUB 64 #define MAX_CPUS_PER_SOCKET 32 -#define UV_ADP_SIZE 64 /* hardware-provided max. */ -#define UV_CPUS_PER_ACT_STATUS 32 /* hardware-provided max. */ -#define UV_ITEMS_PER_DESCRIPTOR 8 +#define ADP_SZ 64 /* hardware-provided max. */ +#define UV_CPUS_PER_AS 32 /* hardware-provided max. */ +#define ITEMS_PER_DESC 8 /* the 'throttle' to prevent the hardware stay-busy bug */ #define MAX_BAU_CONCURRENT 3 #define UV_ACT_STATUS_MASK 0x3 #define UV_ACT_STATUS_SIZE 2 #define UV_DISTRIBUTION_SIZE 256 #define UV_SW_ACK_NPENDING 8 -#define UV_NET_ENDPOINT_INTD 0x38 -#define UV_DESC_BASE_PNODE_SHIFT 49 +#define UV1_NET_ENDPOINT_INTD 0x38 +#define UV2_NET_ENDPOINT_INTD 0x28 +#define UV_NET_ENDPOINT_INTD (is_uv1_hub() ? \ + UV1_NET_ENDPOINT_INTD : UV2_NET_ENDPOINT_INTD) +#define UV_DESC_PSHIFT 49 #define UV_PAYLOADQ_PNODE_SHIFT 49 #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" #define UV_BAU_BASENAME "sgi_uv/bau_tunables" @@ -53,29 +56,64 @@ #define UV_BAU_TUNABLES_FILE "bau_tunables" #define WHITESPACE " \t\n" #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) -#define UV_ENABLE_INTD_SOFT_ACK_MODE_SHIFT 15 -#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHIFT 16 -#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x0000000009UL +#define cpubit_isset(cpu, bau_local_cpumask) \ + test_bit((cpu), (bau_local_cpumask).bits) + /* [19:16] SOFT_ACK timeout period 19: 1 is urgency 7 17:16 1 is multiplier */ -#define BAU_MISC_CONTROL_MULT_MASK 3 +/* + * UV2: Bit 19 selects between + * (0): 10 microsecond timebase and + * (1): 80 microseconds + * we're using 655us, similar to UV1: 65 units of 10us + */ +#define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL) +#define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (65*10UL) + +#define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD (is_uv1_hub() ? \ + UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD : \ + UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD) -#define UVH_AGING_PRESCALE_SEL 0x000000b000UL +#define BAU_MISC_CONTROL_MULT_MASK 3 + +#define UVH_AGING_PRESCALE_SEL 0x000000b000UL /* [30:28] URGENCY_7 an index into a table of times */ -#define BAU_URGENCY_7_SHIFT 28 -#define BAU_URGENCY_7_MASK 7 +#define BAU_URGENCY_7_SHIFT 28 +#define BAU_URGENCY_7_MASK 7 -#define UVH_TRANSACTION_TIMEOUT 0x000000b200UL +#define UVH_TRANSACTION_TIMEOUT 0x000000b200UL /* [45:40] BAU - BAU transaction timeout select - a multiplier */ -#define BAU_TRANS_SHIFT 40 -#define BAU_TRANS_MASK 0x3f +#define BAU_TRANS_SHIFT 40 +#define BAU_TRANS_MASK 0x3f + +/* + * shorten some awkward names + */ +#define AS_PUSH_SHIFT UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT +#define SOFTACK_MSHIFT UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT +#define SOFTACK_PSHIFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT +#define SOFTACK_TIMEOUT_PERIOD UV_INTD_SOFT_ACK_TIMEOUT_PERIOD +#define write_gmmr uv_write_global_mmr64 +#define write_lmmr uv_write_local_mmr +#define read_lmmr uv_read_local_mmr +#define read_gmmr uv_read_global_mmr64 /* * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 */ -#define DESC_STATUS_IDLE 0 -#define DESC_STATUS_ACTIVE 1 -#define DESC_STATUS_DESTINATION_TIMEOUT 2 -#define DESC_STATUS_SOURCE_TIMEOUT 3 +#define DS_IDLE 0 +#define DS_ACTIVE 1 +#define DS_DESTINATION_TIMEOUT 2 +#define DS_SOURCE_TIMEOUT 3 +/* + * bits put together from HRP_LB_BAU_SB_ACTIVATION_STATUS_0/1/2 + * values 1 and 5 will not occur + */ +#define UV2H_DESC_IDLE 0 +#define UV2H_DESC_DEST_TIMEOUT 2 +#define UV2H_DESC_DEST_STRONG_NACK 3 +#define UV2H_DESC_BUSY 4 +#define UV2H_DESC_SOURCE_TIMEOUT 6 +#define UV2H_DESC_DEST_PUT_ERR 7 /* * delay for 'plugged' timeout retries, in microseconds @@ -86,15 +124,24 @@ * threshholds at which to use IPI to free resources */ /* after this # consecutive 'plugged' timeouts, use IPI to release resources */ -#define PLUGSB4RESET 100 +#define PLUGSB4RESET 100 /* after this many consecutive timeouts, use IPI to release resources */ -#define TIMEOUTSB4RESET 1 +#define TIMEOUTSB4RESET 1 /* at this number uses of IPI to release resources, giveup the request */ -#define IPI_RESET_LIMIT 1 +#define IPI_RESET_LIMIT 1 /* after this # consecutive successes, bump up the throttle if it was lowered */ -#define COMPLETE_THRESHOLD 5 +#define COMPLETE_THRESHOLD 5 + +#define UV_LB_SUBNODEID 0x10 -#define UV_LB_SUBNODEID 0x10 +/* these two are the same for UV1 and UV2: */ +#define UV_SA_SHFT UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT +#define UV_SA_MASK UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK +/* 4 bits of software ack period */ +#define UV2_ACK_MASK 0x7UL +#define UV2_ACK_UNITS_SHFT 3 +#define UV2_LEG_SHFT UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT +#define UV2_EXT_SHFT UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT /* * number of entries in the destination side payload queue @@ -115,9 +162,16 @@ /* * tuning the action when the numalink network is extremely delayed */ -#define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in microseconds */ -#define CONGESTED_REPS 10 /* long delays averaged over this many broadcasts */ -#define CONGESTED_PERIOD 30 /* time for the bau to be disabled, in seconds */ +#define CONGESTED_RESPONSE_US 1000 /* 'long' response time, in + microseconds */ +#define CONGESTED_REPS 10 /* long delays averaged over + this many broadcasts */ +#define CONGESTED_PERIOD 30 /* time for the bau to be + disabled, in seconds */ +/* see msg_type: */ +#define MSG_NOOP 0 +#define MSG_REGULAR 1 +#define MSG_RETRY 2 /* * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) @@ -129,8 +183,8 @@ * 'base_dest_nasid' field of the header corresponds to the * destination nodeID associated with that specified bit. */ -struct bau_target_uvhubmask { - unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; +struct bau_targ_hubmask { + unsigned long bits[BITS_TO_LONGS(UV_DISTRIBUTION_SIZE)]; }; /* @@ -139,7 +193,7 @@ struct bau_target_uvhubmask { * enough bits for max. cpu's per uvhub) */ struct bau_local_cpumask { - unsigned long bits; + unsigned long bits; }; /* @@ -160,14 +214,14 @@ struct bau_local_cpumask { * The payload is software-defined for INTD transactions */ struct bau_msg_payload { - unsigned long address; /* signifies a page or all TLB's - of the cpu */ + unsigned long address; /* signifies a page or all + TLB's of the cpu */ /* 64 bits */ - unsigned short sending_cpu; /* filled in by sender */ + unsigned short sending_cpu; /* filled in by sender */ /* 16 bits */ - unsigned short acknowledge_count;/* filled in by destination */ + unsigned short acknowledge_count; /* filled in by destination */ /* 16 bits */ - unsigned int reserved1:32; /* not usable */ + unsigned int reserved1:32; /* not usable */ }; @@ -176,93 +230,96 @@ struct bau_msg_payload { * see table 4.2.3.0.1 in broacast_assist spec. */ struct bau_msg_header { - unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ + unsigned int dest_subnodeid:6; /* must be 0x10, for the LB */ /* bits 5:0 */ - unsigned int base_dest_nasid:15; /* nasid of the */ - /* bits 20:6 */ /* first bit in uvhub map */ - unsigned int command:8; /* message type */ + unsigned int base_dest_nasid:15; /* nasid of the first bit */ + /* bits 20:6 */ /* in uvhub map */ + unsigned int command:8; /* message type */ /* bits 28:21 */ - /* 0x38: SN3net EndPoint Message */ - unsigned int rsvd_1:3; /* must be zero */ + /* 0x38: SN3net EndPoint Message */ + unsigned int rsvd_1:3; /* must be zero */ /* bits 31:29 */ - /* int will align on 32 bits */ - unsigned int rsvd_2:9; /* must be zero */ + /* int will align on 32 bits */ + unsigned int rsvd_2:9; /* must be zero */ /* bits 40:32 */ - /* Suppl_A is 56-41 */ - unsigned int sequence:16;/* message sequence number */ - /* bits 56:41 */ /* becomes bytes 16-17 of msg */ - /* Address field (96:57) is never used as an - address (these are address bits 42:3) */ - - unsigned int rsvd_3:1; /* must be zero */ + /* Suppl_A is 56-41 */ + unsigned int sequence:16; /* message sequence number */ + /* bits 56:41 */ /* becomes bytes 16-17 of msg */ + /* Address field (96:57) is + never used as an address + (these are address bits + 42:3) */ + + unsigned int rsvd_3:1; /* must be zero */ /* bit 57 */ - /* address bits 27:4 are payload */ + /* address bits 27:4 are payload */ /* these next 24 (58-81) bits become bytes 12-14 of msg */ - /* bits 65:58 land in byte 12 */ - unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ + unsigned int replied_to:1; /* sent as 0 by the source to + byte 12 */ /* bit 58 */ - unsigned int msg_type:3; /* software type of the message*/ + unsigned int msg_type:3; /* software type of the + message */ /* bits 61:59 */ - unsigned int canceled:1; /* message canceled, resource to be freed*/ + unsigned int canceled:1; /* message canceled, resource + is to be freed*/ /* bit 62 */ - unsigned int payload_1a:1;/* not currently used */ + unsigned int payload_1a:1; /* not currently used */ /* bit 63 */ - unsigned int payload_1b:2;/* not currently used */ + unsigned int payload_1b:2; /* not currently used */ /* bits 65:64 */ /* bits 73:66 land in byte 13 */ - unsigned int payload_1ca:6;/* not currently used */ + unsigned int payload_1ca:6; /* not currently used */ /* bits 71:66 */ - unsigned int payload_1c:2;/* not currently used */ + unsigned int payload_1c:2; /* not currently used */ /* bits 73:72 */ /* bits 81:74 land in byte 14 */ - unsigned int payload_1d:6;/* not currently used */ + unsigned int payload_1d:6; /* not currently used */ /* bits 79:74 */ - unsigned int payload_1e:2;/* not currently used */ + unsigned int payload_1e:2; /* not currently used */ /* bits 81:80 */ - unsigned int rsvd_4:7; /* must be zero */ + unsigned int rsvd_4:7; /* must be zero */ /* bits 88:82 */ - unsigned int sw_ack_flag:1;/* software acknowledge flag */ + unsigned int swack_flag:1; /* software acknowledge flag */ /* bit 89 */ - /* INTD trasactions at destination are to - wait for software acknowledge */ - unsigned int rsvd_5:6; /* must be zero */ + /* INTD trasactions at + destination are to wait for + software acknowledge */ + unsigned int rsvd_5:6; /* must be zero */ /* bits 95:90 */ - unsigned int rsvd_6:5; /* must be zero */ + unsigned int rsvd_6:5; /* must be zero */ /* bits 100:96 */ - unsigned int int_both:1;/* if 1, interrupt both sockets on the uvhub */ + unsigned int int_both:1; /* if 1, interrupt both sockets + on the uvhub */ /* bit 101*/ - unsigned int fairness:3;/* usually zero */ + unsigned int fairness:3; /* usually zero */ /* bits 104:102 */ - unsigned int multilevel:1; /* multi-level multicast format */ + unsigned int multilevel:1; /* multi-level multicast + format */ /* bit 105 */ - /* 0 for TLB: endpoint multi-unicast messages */ - unsigned int chaining:1;/* next descriptor is part of this activation*/ + /* 0 for TLB: endpoint multi-unicast messages */ + unsigned int chaining:1; /* next descriptor is part of + this activation*/ /* bit 106 */ - unsigned int rsvd_7:21; /* must be zero */ + unsigned int rsvd_7:21; /* must be zero */ /* bits 127:107 */ }; -/* see msg_type: */ -#define MSG_NOOP 0 -#define MSG_REGULAR 1 -#define MSG_RETRY 2 - /* * The activation descriptor: * The format of the message to send, plus all accompanying control * Should be 64 bytes */ struct bau_desc { - struct bau_target_uvhubmask distribution; + struct bau_targ_hubmask distribution; /* * message template, consisting of header and payload: */ - struct bau_msg_header header; - struct bau_msg_payload payload; + struct bau_msg_header header; + struct bau_msg_payload payload; }; /* * -payload-- ---------header------ @@ -281,59 +338,51 @@ struct bau_desc { * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17 * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120) * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from - * sw_ack_vector and payload_2) + * swack_vec and payload_2) * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload * operation." */ -struct bau_payload_queue_entry { - unsigned long address; /* signifies a page or all TLB's - of the cpu */ +struct bau_pq_entry { + unsigned long address; /* signifies a page or all TLB's + of the cpu */ /* 64 bits, bytes 0-7 */ - - unsigned short sending_cpu; /* cpu that sent the message */ + unsigned short sending_cpu; /* cpu that sent the message */ /* 16 bits, bytes 8-9 */ - - unsigned short acknowledge_count; /* filled in by destination */ + unsigned short acknowledge_count; /* filled in by destination */ /* 16 bits, bytes 10-11 */ - /* these next 3 bytes come from bits 58-81 of the message header */ - unsigned short replied_to:1; /* sent as 0 by the source */ - unsigned short msg_type:3; /* software message type */ - unsigned short canceled:1; /* sent as 0 by the source */ - unsigned short unused1:3; /* not currently using */ + unsigned short replied_to:1; /* sent as 0 by the source */ + unsigned short msg_type:3; /* software message type */ + unsigned short canceled:1; /* sent as 0 by the source */ + unsigned short unused1:3; /* not currently using */ /* byte 12 */ - - unsigned char unused2a; /* not currently using */ + unsigned char unused2a; /* not currently using */ /* byte 13 */ - unsigned char unused2; /* not currently using */ + unsigned char unused2; /* not currently using */ /* byte 14 */ - - unsigned char sw_ack_vector; /* filled in by the hardware */ + unsigned char swack_vec; /* filled in by the hardware */ /* byte 15 (bits 127:120) */ - - unsigned short sequence; /* message sequence number */ + unsigned short sequence; /* message sequence number */ /* bytes 16-17 */ - unsigned char unused4[2]; /* not currently using bytes 18-19 */ + unsigned char unused4[2]; /* not currently using bytes 18-19 */ /* bytes 18-19 */ - - int number_of_cpus; /* filled in at destination */ + int number_of_cpus; /* filled in at destination */ /* 32 bits, bytes 20-23 (aligned) */ - - unsigned char unused5[8]; /* not using */ + unsigned char unused5[8]; /* not using */ /* bytes 24-31 */ }; struct msg_desc { - struct bau_payload_queue_entry *msg; - int msg_slot; - int sw_ack_slot; - struct bau_payload_queue_entry *va_queue_first; - struct bau_payload_queue_entry *va_queue_last; + struct bau_pq_entry *msg; + int msg_slot; + int swack_slot; + struct bau_pq_entry *queue_first; + struct bau_pq_entry *queue_last; }; struct reset_args { - int sender; + int sender; }; /* @@ -341,112 +390,226 @@ struct reset_args { */ struct ptc_stats { /* sender statistics */ - unsigned long s_giveup; /* number of fall backs to IPI-style flushes */ - unsigned long s_requestor; /* number of shootdown requests */ - unsigned long s_stimeout; /* source side timeouts */ - unsigned long s_dtimeout; /* destination side timeouts */ - unsigned long s_time; /* time spent in sending side */ - unsigned long s_retriesok; /* successful retries */ - unsigned long s_ntargcpu; /* total number of cpu's targeted */ - unsigned long s_ntargself; /* times the sending cpu was targeted */ - unsigned long s_ntarglocals; /* targets of cpus on the local blade */ - unsigned long s_ntargremotes; /* targets of cpus on remote blades */ - unsigned long s_ntarglocaluvhub; /* targets of the local hub */ - unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ - unsigned long s_ntarguvhub; /* total number of uvhubs targeted */ - unsigned long s_ntarguvhub16; /* number of times target hubs >= 16*/ - unsigned long s_ntarguvhub8; /* number of times target hubs >= 8 */ - unsigned long s_ntarguvhub4; /* number of times target hubs >= 4 */ - unsigned long s_ntarguvhub2; /* number of times target hubs >= 2 */ - unsigned long s_ntarguvhub1; /* number of times target hubs == 1 */ - unsigned long s_resets_plug; /* ipi-style resets from plug state */ - unsigned long s_resets_timeout; /* ipi-style resets from timeouts */ - unsigned long s_busy; /* status stayed busy past s/w timer */ - unsigned long s_throttles; /* waits in throttle */ - unsigned long s_retry_messages; /* retry broadcasts */ - unsigned long s_bau_reenabled; /* for bau enable/disable */ - unsigned long s_bau_disabled; /* for bau enable/disable */ + unsigned long s_giveup; /* number of fall backs to + IPI-style flushes */ + unsigned long s_requestor; /* number of shootdown + requests */ + unsigned long s_stimeout; /* source side timeouts */ + unsigned long s_dtimeout; /* destination side timeouts */ + unsigned long s_time; /* time spent in sending side */ + unsigned long s_retriesok; /* successful retries */ + unsigned long s_ntargcpu; /* total number of cpu's + targeted */ + unsigned long s_ntargself; /* times the sending cpu was + targeted */ + unsigned long s_ntarglocals; /* targets of cpus on the local + blade */ + unsigned long s_ntargremotes; /* targets of cpus on remote + blades */ + unsigned long s_ntarglocaluvhub; /* targets of the local hub */ + unsigned long s_ntargremoteuvhub; /* remotes hubs targeted */ + unsigned long s_ntarguvhub; /* total number of uvhubs + targeted */ + unsigned long s_ntarguvhub16; /* number of times target + hubs >= 16*/ + unsigned long s_ntarguvhub8; /* number of times target + hubs >= 8 */ + unsigned long s_ntarguvhub4; /* number of times target + hubs >= 4 */ + unsigned long s_ntarguvhub2; /* number of times target + hubs >= 2 */ + unsigned long s_ntarguvhub1; /* number of times target + hubs == 1 */ + unsigned long s_resets_plug; /* ipi-style resets from plug + state */ + unsigned long s_resets_timeout; /* ipi-style resets from + timeouts */ + unsigned long s_busy; /* status stayed busy past + s/w timer */ + unsigned long s_throttles; /* waits in throttle */ + unsigned long s_retry_messages; /* retry broadcasts */ + unsigned long s_bau_reenabled; /* for bau enable/disable */ + unsigned long s_bau_disabled; /* for bau enable/disable */ /* destination statistics */ - unsigned long d_alltlb; /* times all tlb's on this cpu were flushed */ - unsigned long d_onetlb; /* times just one tlb on this cpu was flushed */ - unsigned long d_multmsg; /* interrupts with multiple messages */ - unsigned long d_nomsg; /* interrupts with no message */ - unsigned long d_time; /* time spent on destination side */ - unsigned long d_requestee; /* number of messages processed */ - unsigned long d_retries; /* number of retry messages processed */ - unsigned long d_canceled; /* number of messages canceled by retries */ - unsigned long d_nocanceled; /* retries that found nothing to cancel */ - unsigned long d_resets; /* number of ipi-style requests processed */ - unsigned long d_rcanceled; /* number of messages canceled by resets */ + unsigned long d_alltlb; /* times all tlb's on this + cpu were flushed */ + unsigned long d_onetlb; /* times just one tlb on this + cpu was flushed */ + unsigned long d_multmsg; /* interrupts with multiple + messages */ + unsigned long d_nomsg; /* interrupts with no message */ + unsigned long d_time; /* time spent on destination + side */ + unsigned long d_requestee; /* number of messages + processed */ + unsigned long d_retries; /* number of retry messages + processed */ + unsigned long d_canceled; /* number of messages canceled + by retries */ + unsigned long d_nocanceled; /* retries that found nothing + to cancel */ + unsigned long d_resets; /* number of ipi-style requests + processed */ + unsigned long d_rcanceled; /* number of messages canceled + by resets */ +}; + +struct tunables { + int *tunp; + int deflt; }; struct hub_and_pnode { - short uvhub; - short pnode; + short uvhub; + short pnode; }; + +struct socket_desc { + short num_cpus; + short cpu_number[MAX_CPUS_PER_SOCKET]; +}; + +struct uvhub_desc { + unsigned short socket_mask; + short num_cpus; + short uvhub; + short pnode; + struct socket_desc socket[2]; +}; + /* * one per-cpu; to locate the software tables */ struct bau_control { - struct bau_desc *descriptor_base; - struct bau_payload_queue_entry *va_queue_first; - struct bau_payload_queue_entry *va_queue_last; - struct bau_payload_queue_entry *bau_msg_head; - struct bau_control *uvhub_master; - struct bau_control *socket_master; - struct ptc_stats *statp; - unsigned long timeout_interval; - unsigned long set_bau_on_time; - atomic_t active_descriptor_count; - int plugged_tries; - int timeout_tries; - int ipi_attempts; - int conseccompletes; - int baudisabled; - int set_bau_off; - short cpu; - short osnode; - short uvhub_cpu; - short uvhub; - short cpus_in_socket; - short cpus_in_uvhub; - short partition_base_pnode; - unsigned short message_number; - unsigned short uvhub_quiesce; - short socket_acknowledge_count[DEST_Q_SIZE]; - cycles_t send_message; - spinlock_t uvhub_lock; - spinlock_t queue_lock; + struct bau_desc *descriptor_base; + struct bau_pq_entry *queue_first; + struct bau_pq_entry *queue_last; + struct bau_pq_entry *bau_msg_head; + struct bau_control *uvhub_master; + struct bau_control *socket_master; + struct ptc_stats *statp; + unsigned long timeout_interval; + unsigned long set_bau_on_time; + atomic_t active_descriptor_count; + int plugged_tries; + int timeout_tries; + int ipi_attempts; + int conseccompletes; + int baudisabled; + int set_bau_off; + short cpu; + short osnode; + short uvhub_cpu; + short uvhub; + short cpus_in_socket; + short cpus_in_uvhub; + short partition_base_pnode; + unsigned short message_number; + unsigned short uvhub_quiesce; + short socket_acknowledge_count[DEST_Q_SIZE]; + cycles_t send_message; + spinlock_t uvhub_lock; + spinlock_t queue_lock; /* tunables */ - int max_bau_concurrent; - int max_bau_concurrent_constant; - int plugged_delay; - int plugsb4reset; - int timeoutsb4reset; - int ipi_reset_limit; - int complete_threshold; - int congested_response_us; - int congested_reps; - int congested_period; - cycles_t period_time; - long period_requests; - struct hub_and_pnode *target_hub_and_pnode; + int max_concurr; + int max_concurr_const; + int plugged_delay; + int plugsb4reset; + int timeoutsb4reset; + int ipi_reset_limit; + int complete_threshold; + int cong_response_us; + int cong_reps; + int cong_period; + cycles_t period_time; + long period_requests; + struct hub_and_pnode *thp; }; -static inline int bau_uvhub_isset(int uvhub, struct bau_target_uvhubmask *dstp) +static unsigned long read_mmr_uv2_status(void) +{ + return read_lmmr(UV2H_LB_BAU_SB_ACTIVATION_STATUS_2); +} + +static void write_mmr_data_broadcast(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_BAU_DATA_BROADCAST, mmr_image); +} + +static void write_mmr_descriptor_base(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, mmr_image); +} + +static void write_mmr_activation(unsigned long index) +{ + write_lmmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); +} + +static void write_gmmr_activation(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, mmr_image); +} + +static void write_mmr_payload_first(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, mmr_image); +} + +static void write_mmr_payload_tail(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, mmr_image); +} + +static void write_mmr_payload_last(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, mmr_image); +} + +static void write_mmr_misc_control(int pnode, unsigned long mmr_image) +{ + write_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); +} + +static unsigned long read_mmr_misc_control(int pnode) +{ + return read_gmmr(pnode, UVH_LB_BAU_MISC_CONTROL); +} + +static void write_mmr_sw_ack(unsigned long mr) +{ + uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, mr); +} + +static unsigned long read_mmr_sw_ack(void) +{ + return read_lmmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); +} + +static unsigned long read_gmmr_sw_ack(int pnode) +{ + return read_gmmr(pnode, UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); +} + +static void write_mmr_data_config(int pnode, unsigned long mr) +{ + uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, mr); +} + +static inline int bau_uvhub_isset(int uvhub, struct bau_targ_hubmask *dstp) { return constant_test_bit(uvhub, &dstp->bits[0]); } -static inline void bau_uvhub_set(int pnode, struct bau_target_uvhubmask *dstp) +static inline void bau_uvhub_set(int pnode, struct bau_targ_hubmask *dstp) { __set_bit(pnode, &dstp->bits[0]); } -static inline void bau_uvhubs_clear(struct bau_target_uvhubmask *dstp, +static inline void bau_uvhubs_clear(struct bau_targ_hubmask *dstp, int nbits) { bitmap_zero(&dstp->bits[0], nbits); } -static inline int bau_uvhub_weight(struct bau_target_uvhubmask *dstp) +static inline int bau_uvhub_weight(struct bau_targ_hubmask *dstp) { return bitmap_weight((unsigned long *)&dstp->bits[0], UV_DISTRIBUTION_SIZE); @@ -457,9 +620,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) bitmap_zero(&dstp->bits, nbits); } -#define cpubit_isset(cpu, bau_local_cpumask) \ - test_bit((cpu), (bau_local_cpumask).bits) - extern void uv_bau_message_intr1(void); extern void uv_bau_timeout_intr1(void); @@ -467,7 +627,7 @@ struct atomic_short { short counter; }; -/** +/* * atomic_read_short - read a short atomic variable * @v: pointer of type atomic_short * @@ -478,14 +638,14 @@ static inline int atomic_read_short(const struct atomic_short *v) return v->counter; } -/** - * atomic_add_short_return - add and return a short int +/* + * atom_asr - add and return a short int * @i: short value to add * @v: pointer of type atomic_short * * Atomically adds @i to @v and returns @i + @v */ -static inline int atomic_add_short_return(short i, struct atomic_short *v) +static inline int atom_asr(short i, struct atomic_short *v) { short __i = i; asm volatile(LOCK_PREFIX "xaddw %0, %1" @@ -494,4 +654,26 @@ static inline int atomic_add_short_return(short i, struct atomic_short *v) return i + __i; } +/* + * conditionally add 1 to *v, unless *v is >= u + * return 0 if we cannot add 1 to *v because it is >= u + * return 1 if we can add 1 to *v because it is < u + * the add is atomic + * + * This is close to atomic_add_unless(), but this allows the 'u' value + * to be lowered below the current 'v'. atomic_add_unless can only stop + * on equal. + */ +static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) +{ + spin_lock(lock); + if (atomic_read(v) >= u) { + spin_unlock(lock); + return 0; + } + atomic_inc(v); + spin_unlock(lock); + return 1; +} + #endif /* _ASM_X86_UV_UV_BAU_H */ diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 4298002d0c8..f26544a1521 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -77,8 +77,9 @@ * * 1111110000000000 * 5432109876543210 - * pppppppppplc0cch Nehalem-EX - * ppppppppplcc0cch Westmere-EX + * pppppppppplc0cch Nehalem-EX (12 bits in hdw reg) + * ppppppppplcc0cch Westmere-EX (12 bits in hdw reg) + * pppppppppppcccch SandyBridge (15 bits in hdw reg) * sssssssssss * * p = pnode bits @@ -87,7 +88,7 @@ * h = hyperthread * s = bits that are in the SOCKET_ID CSR * - * Note: Processor only supports 12 bits in the APICID register. The ACPI + * Note: Processor may support fewer bits in the APICID register. The ACPI * tables hold all 16 bits. Software needs to be aware of this. * * Unless otherwise specified, all references to APICID refer to @@ -138,6 +139,8 @@ struct uv_hub_info_s { unsigned long global_mmr_base; unsigned long gpa_mask; unsigned int gnode_extra; + unsigned char hub_revision; + unsigned char apic_pnode_shift; unsigned long gnode_upper; unsigned long lowmem_remap_top; unsigned long lowmem_remap_base; @@ -149,13 +152,31 @@ struct uv_hub_info_s { unsigned char m_val; unsigned char n_val; struct uv_scir_s scir; - unsigned char apic_pnode_shift; }; DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) +/* + * Hub revisions less than UV2_HUB_REVISION_BASE are UV1 hubs. All UV2 + * hubs have revision numbers greater than or equal to UV2_HUB_REVISION_BASE. + * This is a software convention - NOT the hardware revision numbers in + * the hub chip. + */ +#define UV1_HUB_REVISION_BASE 1 +#define UV2_HUB_REVISION_BASE 3 + +static inline int is_uv1_hub(void) +{ + return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE; +} + +static inline int is_uv2_hub(void) +{ + return uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE; +} + union uvh_apicid { unsigned long v; struct uvh_apicid_s { @@ -180,11 +201,25 @@ union uvh_apicid { #define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) #define UV_PNODE_TO_NASID(p) (UV_PNODE_TO_GNODE(p) << 1) -#define UV_LOCAL_MMR_BASE 0xf4000000UL -#define UV_GLOBAL_MMR32_BASE 0xf8000000UL +#define UV1_LOCAL_MMR_BASE 0xf4000000UL +#define UV1_GLOBAL_MMR32_BASE 0xf8000000UL +#define UV1_LOCAL_MMR_SIZE (64UL * 1024 * 1024) +#define UV1_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024) + +#define UV2_LOCAL_MMR_BASE 0xfa000000UL +#define UV2_GLOBAL_MMR32_BASE 0xfc000000UL +#define UV2_LOCAL_MMR_SIZE (32UL * 1024 * 1024) +#define UV2_GLOBAL_MMR32_SIZE (32UL * 1024 * 1024) + +#define UV_LOCAL_MMR_BASE (is_uv1_hub() ? UV1_LOCAL_MMR_BASE \ + : UV2_LOCAL_MMR_BASE) +#define UV_GLOBAL_MMR32_BASE (is_uv1_hub() ? UV1_GLOBAL_MMR32_BASE \ + : UV2_GLOBAL_MMR32_BASE) +#define UV_LOCAL_MMR_SIZE (is_uv1_hub() ? UV1_LOCAL_MMR_SIZE : \ + UV2_LOCAL_MMR_SIZE) +#define UV_GLOBAL_MMR32_SIZE (is_uv1_hub() ? UV1_GLOBAL_MMR32_SIZE :\ + UV2_GLOBAL_MMR32_SIZE) #define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) -#define UV_LOCAL_MMR_SIZE (64UL * 1024 * 1024) -#define UV_GLOBAL_MMR32_SIZE (64UL * 1024 * 1024) #define UV_GLOBAL_GRU_MMR_BASE 0x4000000 @@ -301,6 +336,17 @@ static inline int uv_apicid_to_pnode(int apicid) } /* + * Convert an apicid to the socket number on the blade + */ +static inline int uv_apicid_to_socket(int apicid) +{ + if (is_uv1_hub()) + return (apicid >> (uv_hub_info->apic_pnode_shift - 1)) & 1; + else + return 0; +} + +/* * Access global MMRs using the low memory MMR32 space. This region supports * faster MMR access but not all MMRs are accessible in this space. */ @@ -519,14 +565,13 @@ static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) /* * Get the minimum revision number of the hub chips within the partition. - * 1 - initial rev 1.0 silicon - * 2 - rev 2.0 production silicon + * 1 - UV1 rev 1.0 initial silicon + * 2 - UV1 rev 2.0 production silicon + * 3 - UV2 rev 1.0 initial silicon */ static inline int uv_get_min_hub_revision_id(void) { - extern int uv_min_hub_revision_id; - - return uv_min_hub_revision_id; + return uv_hub_info->hub_revision; } #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index f5bb64a823d..4be52c86344 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -11,13 +11,64 @@ #ifndef _ASM_X86_UV_UV_MMRS_H #define _ASM_X86_UV_UV_MMRS_H +/* + * This file contains MMR definitions for both UV1 & UV2 hubs. + * + * In general, MMR addresses and structures are identical on both hubs. + * These MMRs are identified as: + * #define UVH_xxx <address> + * union uvh_xxx { + * unsigned long v; + * struct uvh_int_cmpd_s { + * } s; + * }; + * + * If the MMR exists on both hub type but has different addresses or + * contents, the MMR definition is similar to: + * #define UV1H_xxx <uv1 address> + * #define UV2H_xxx <uv2address> + * #define UVH_xxx (is_uv1_hub() ? UV1H_xxx : UV2H_xxx) + * union uvh_xxx { + * unsigned long v; + * struct uv1h_int_cmpd_s { (Common fields only) + * } s; + * struct uv1h_int_cmpd_s { (Full UV1 definition) + * } s1; + * struct uv2h_int_cmpd_s { (Full UV2 definition) + * } s2; + * }; + * + * Only essential difference are enumerated. For example, if the address is + * the same for both UV1 & UV2, only a single #define is generated. Likewise, + * if the contents is the same for both hubs, only the "s" structure is + * generated. + * + * If the MMR exists on ONLY 1 type of hub, no generic definition is + * generated: + * #define UVnH_xxx <uvn address> + * union uvnh_xxx { + * unsigned long v; + * struct uvh_int_cmpd_s { + * } sn; + * }; + */ + #define UV_MMR_ENABLE (1UL << 63) +#define UV1_HUB_PART_NUMBER 0x88a5 +#define UV2_HUB_PART_NUMBER 0x8eb8 + +/* Compat: if this #define is present, UV headers support UV2 */ +#define UV2_HUB_IS_SUPPORTED 1 + +/* KABI compat: if this #define is present, KABI hacks are present */ +#define UV2_HUB_KABI_HACKS 1 + /* ========================================================================= */ /* UVH_BAU_DATA_BROADCAST */ /* ========================================================================= */ #define UVH_BAU_DATA_BROADCAST 0x61688UL -#define UVH_BAU_DATA_BROADCAST_32 0x0440 +#define UVH_BAU_DATA_BROADCAST_32 0x440 #define UVH_BAU_DATA_BROADCAST_ENABLE_SHFT 0 #define UVH_BAU_DATA_BROADCAST_ENABLE_MASK 0x0000000000000001UL @@ -34,7 +85,7 @@ union uvh_bau_data_broadcast_u { /* UVH_BAU_DATA_CONFIG */ /* ========================================================================= */ #define UVH_BAU_DATA_CONFIG 0x61680UL -#define UVH_BAU_DATA_CONFIG_32 0x0438 +#define UVH_BAU_DATA_CONFIG_32 0x438 #define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 #define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL @@ -73,125 +124,245 @@ union uvh_bau_data_config_u { /* UVH_EVENT_OCCURRED0 */ /* ========================================================================= */ #define UVH_EVENT_OCCURRED0 0x70000UL -#define UVH_EVENT_OCCURRED0_32 0x005e8 - -#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 -#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL -#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 -#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL -#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 -#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL -#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3 -#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL -#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4 -#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL -#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5 -#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL -#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6 -#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL -#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 -#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL -#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 -#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL -#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 -#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL -#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 -#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL -#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 -#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL -#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 -#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL -#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 -#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL -#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 -#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL -#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 -#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL -#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 -#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL -#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 -#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL -#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 -#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL -#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 -#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL -#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 -#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL -#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 -#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL -#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 -#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 -#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL -#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 -#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL -#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 -#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL -#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 -#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL -#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 -#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL -#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43 -#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL -#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 -#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL -#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45 -#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 -#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 -#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 -#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL -#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 -#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL -#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 -#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL -#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51 -#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL -#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52 -#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL -#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53 -#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL -#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54 -#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL -#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55 -#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL -#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 -#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL +#define UVH_EVENT_OCCURRED0_32 0x5e8 + +#define UV1H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UV1H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL +#define UV1H_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 +#define UV1H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL +#define UV1H_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 +#define UV1H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL +#define UV1H_EVENT_OCCURRED0_LH_HCERR_SHFT 3 +#define UV1H_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL +#define UV1H_EVENT_OCCURRED0_RH_HCERR_SHFT 4 +#define UV1H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL +#define UV1H_EVENT_OCCURRED0_XN_HCERR_SHFT 5 +#define UV1H_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL +#define UV1H_EVENT_OCCURRED0_SI_HCERR_SHFT 6 +#define UV1H_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL +#define UV1H_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 +#define UV1H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL +#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 +#define UV1H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL +#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 +#define UV1H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL +#define UV1H_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 +#define UV1H_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL +#define UV1H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UV1H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UV1H_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 +#define UV1H_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL +#define UV1H_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 +#define UV1H_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL +#define UV1H_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 +#define UV1H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL +#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 +#define UV1H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL +#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 +#define UV1H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL +#define UV1H_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 +#define UV1H_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL +#define UV1H_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 +#define UV1H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL +#define UV1H_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 +#define UV1H_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL +#define UV1H_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 +#define UV1H_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL +#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 +#define UV1H_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL +#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 +#define UV1H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 +#define UV1H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL +#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 +#define UV1H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL +#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 +#define UV1H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL +#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 +#define UV1H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL +#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 +#define UV1H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL +#define UV1H_EVENT_OCCURRED0_LTC_INT_SHFT 43 +#define UV1H_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL +#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 +#define UV1H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL +#define UV1H_EVENT_OCCURRED0_IPI_INT_SHFT 45 +#define UV1H_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL +#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 +#define UV1H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL +#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 +#define UV1H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL +#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 +#define UV1H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL +#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 +#define UV1H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL +#define UV1H_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 +#define UV1H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL +#define UV1H_EVENT_OCCURRED0_RTC0_SHFT 51 +#define UV1H_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL +#define UV1H_EVENT_OCCURRED0_RTC1_SHFT 52 +#define UV1H_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL +#define UV1H_EVENT_OCCURRED0_RTC2_SHFT 53 +#define UV1H_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL +#define UV1H_EVENT_OCCURRED0_RTC3_SHFT 54 +#define UV1H_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL +#define UV1H_EVENT_OCCURRED0_BAU_DATA_SHFT 55 +#define UV1H_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL +#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 +#define UV1H_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL + +#define UV2H_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UV2H_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL +#define UV2H_EVENT_OCCURRED0_QP_HCERR_SHFT 1 +#define UV2H_EVENT_OCCURRED0_QP_HCERR_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED0_RH_HCERR_SHFT 2 +#define UV2H_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000004UL +#define UV2H_EVENT_OCCURRED0_LH0_HCERR_SHFT 3 +#define UV2H_EVENT_OCCURRED0_LH0_HCERR_MASK 0x0000000000000008UL +#define UV2H_EVENT_OCCURRED0_LH1_HCERR_SHFT 4 +#define UV2H_EVENT_OCCURRED0_LH1_HCERR_MASK 0x0000000000000010UL +#define UV2H_EVENT_OCCURRED0_GR0_HCERR_SHFT 5 +#define UV2H_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000020UL +#define UV2H_EVENT_OCCURRED0_GR1_HCERR_SHFT 6 +#define UV2H_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000040UL +#define UV2H_EVENT_OCCURRED0_NI0_HCERR_SHFT 7 +#define UV2H_EVENT_OCCURRED0_NI0_HCERR_MASK 0x0000000000000080UL +#define UV2H_EVENT_OCCURRED0_NI1_HCERR_SHFT 8 +#define UV2H_EVENT_OCCURRED0_NI1_HCERR_MASK 0x0000000000000100UL +#define UV2H_EVENT_OCCURRED0_LB_AOERR0_SHFT 9 +#define UV2H_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000200UL +#define UV2H_EVENT_OCCURRED0_QP_AOERR0_SHFT 10 +#define UV2H_EVENT_OCCURRED0_QP_AOERR0_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UV2H_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_SHFT 12 +#define UV2H_EVENT_OCCURRED0_LH0_AOERR0_MASK 0x0000000000001000UL +#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_SHFT 13 +#define UV2H_EVENT_OCCURRED0_LH1_AOERR0_MASK 0x0000000000002000UL +#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_SHFT 14 +#define UV2H_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000004000UL +#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_SHFT 15 +#define UV2H_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000008000UL +#define UV2H_EVENT_OCCURRED0_XB_AOERR0_SHFT 16 +#define UV2H_EVENT_OCCURRED0_XB_AOERR0_MASK 0x0000000000010000UL +#define UV2H_EVENT_OCCURRED0_RT_AOERR0_SHFT 17 +#define UV2H_EVENT_OCCURRED0_RT_AOERR0_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_SHFT 18 +#define UV2H_EVENT_OCCURRED0_NI0_AOERR0_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_SHFT 19 +#define UV2H_EVENT_OCCURRED0_NI1_AOERR0_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED0_LB_AOERR1_SHFT 20 +#define UV2H_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED0_QP_AOERR1_SHFT 21 +#define UV2H_EVENT_OCCURRED0_QP_AOERR1_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED0_RH_AOERR1_SHFT 22 +#define UV2H_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_SHFT 23 +#define UV2H_EVENT_OCCURRED0_LH0_AOERR1_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_SHFT 24 +#define UV2H_EVENT_OCCURRED0_LH1_AOERR1_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_SHFT 25 +#define UV2H_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_SHFT 26 +#define UV2H_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED0_XB_AOERR1_SHFT 27 +#define UV2H_EVENT_OCCURRED0_XB_AOERR1_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED0_RT_AOERR1_SHFT 28 +#define UV2H_EVENT_OCCURRED0_RT_AOERR1_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_SHFT 29 +#define UV2H_EVENT_OCCURRED0_NI0_AOERR1_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_SHFT 30 +#define UV2H_EVENT_OCCURRED0_NI1_AOERR1_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 31 +#define UV2H_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000080000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 32 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000100000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 33 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000200000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 34 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000400000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 35 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000800000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 36 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000001000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 37 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000002000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 38 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000004000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 39 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000008000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 40 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000010000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 41 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000020000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 42 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000040000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 43 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000080000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 44 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000100000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 45 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000200000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 46 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000400000000000UL +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 47 +#define UV2H_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000800000000000UL +#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_SHFT 48 +#define UV2H_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0001000000000000UL +#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_SHFT 49 +#define UV2H_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0002000000000000UL +#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 50 +#define UV2H_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0004000000000000UL +#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 51 +#define UV2H_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0008000000000000UL +#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 52 +#define UV2H_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0010000000000000UL +#define UV2H_EVENT_OCCURRED0_IPI_INT_SHFT 53 +#define UV2H_EVENT_OCCURRED0_IPI_INT_MASK 0x0020000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_SHFT 54 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0040000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_SHFT 55 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0080000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_SHFT 56 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0100000000000000UL +#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_SHFT 57 +#define UV2H_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0200000000000000UL +#define UV2H_EVENT_OCCURRED0_PROFILE_INT_SHFT 58 +#define UV2H_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0400000000000000UL + union uvh_event_occurred0_u { unsigned long v; - struct uvh_event_occurred0_s { + struct uv1h_event_occurred0_s { unsigned long lb_hcerr : 1; /* RW, W1C */ unsigned long gr0_hcerr : 1; /* RW, W1C */ unsigned long gr1_hcerr : 1; /* RW, W1C */ @@ -250,14 +421,76 @@ union uvh_event_occurred0_u { unsigned long bau_data : 1; /* RW, W1C */ unsigned long power_management_req : 1; /* RW, W1C */ unsigned long rsvd_57_63 : 7; /* */ - } s; + } s1; + struct uv2h_event_occurred0_s { + unsigned long lb_hcerr : 1; /* RW */ + unsigned long qp_hcerr : 1; /* RW */ + unsigned long rh_hcerr : 1; /* RW */ + unsigned long lh0_hcerr : 1; /* RW */ + unsigned long lh1_hcerr : 1; /* RW */ + unsigned long gr0_hcerr : 1; /* RW */ + unsigned long gr1_hcerr : 1; /* RW */ + unsigned long ni0_hcerr : 1; /* RW */ + unsigned long ni1_hcerr : 1; /* RW */ + unsigned long lb_aoerr0 : 1; /* RW */ + unsigned long qp_aoerr0 : 1; /* RW */ + unsigned long rh_aoerr0 : 1; /* RW */ + unsigned long lh0_aoerr0 : 1; /* RW */ + unsigned long lh1_aoerr0 : 1; /* RW */ + unsigned long gr0_aoerr0 : 1; /* RW */ + unsigned long gr1_aoerr0 : 1; /* RW */ + unsigned long xb_aoerr0 : 1; /* RW */ + unsigned long rt_aoerr0 : 1; /* RW */ + unsigned long ni0_aoerr0 : 1; /* RW */ + unsigned long ni1_aoerr0 : 1; /* RW */ + unsigned long lb_aoerr1 : 1; /* RW */ + unsigned long qp_aoerr1 : 1; /* RW */ + unsigned long rh_aoerr1 : 1; /* RW */ + unsigned long lh0_aoerr1 : 1; /* RW */ + unsigned long lh1_aoerr1 : 1; /* RW */ + unsigned long gr0_aoerr1 : 1; /* RW */ + unsigned long gr1_aoerr1 : 1; /* RW */ + unsigned long xb_aoerr1 : 1; /* RW */ + unsigned long rt_aoerr1 : 1; /* RW */ + unsigned long ni0_aoerr1 : 1; /* RW */ + unsigned long ni1_aoerr1 : 1; /* RW */ + unsigned long system_shutdown_int : 1; /* RW */ + unsigned long lb_irq_int_0 : 1; /* RW */ + unsigned long lb_irq_int_1 : 1; /* RW */ + unsigned long lb_irq_int_2 : 1; /* RW */ + unsigned long lb_irq_int_3 : 1; /* RW */ + unsigned long lb_irq_int_4 : 1; /* RW */ + unsigned long lb_irq_int_5 : 1; /* RW */ + unsigned long lb_irq_int_6 : 1; /* RW */ + unsigned long lb_irq_int_7 : 1; /* RW */ + unsigned long lb_irq_int_8 : 1; /* RW */ + unsigned long lb_irq_int_9 : 1; /* RW */ + unsigned long lb_irq_int_10 : 1; /* RW */ + unsigned long lb_irq_int_11 : 1; /* RW */ + unsigned long lb_irq_int_12 : 1; /* RW */ + unsigned long lb_irq_int_13 : 1; /* RW */ + unsigned long lb_irq_int_14 : 1; /* RW */ + unsigned long lb_irq_int_15 : 1; /* RW */ + unsigned long l1_nmi_int : 1; /* RW */ + unsigned long stop_clock : 1; /* RW */ + unsigned long asic_to_l1 : 1; /* RW */ + unsigned long l1_to_asic : 1; /* RW */ + unsigned long la_seq_trigger : 1; /* RW */ + unsigned long ipi_int : 1; /* RW */ + unsigned long extio_int0 : 1; /* RW */ + unsigned long extio_int1 : 1; /* RW */ + unsigned long extio_int2 : 1; /* RW */ + unsigned long extio_int3 : 1; /* RW */ + unsigned long profile_int : 1; /* RW */ + unsigned long rsvd_59_63 : 5; /* */ + } s2; }; /* ========================================================================= */ /* UVH_EVENT_OCCURRED0_ALIAS */ /* ========================================================================= */ #define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL -#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0 +#define UVH_EVENT_OCCURRED0_ALIAS_32 0x5f0 /* ========================================================================= */ /* UVH_GR0_TLB_INT0_CONFIG */ @@ -432,8 +665,16 @@ union uvh_int_cmpb_u { /* ========================================================================= */ #define UVH_INT_CMPC 0x22100UL -#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 -#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL +#define UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT 0 +#define UV2H_INT_CMPC_REAL_TIME_CMPC_SHFT 0 +#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT (is_uv1_hub() ? \ + UV1H_INT_CMPC_REAL_TIME_CMPC_SHFT : \ + UV2H_INT_CMPC_REAL_TIME_CMPC_SHFT) +#define UV1H_INT_CMPC_REAL_TIME_CMPC_MASK 0xffffffffffffffUL +#define UV2H_INT_CMPC_REAL_TIME_CMPC_MASK 0xffffffffffffffUL +#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK (is_uv1_hub() ? \ + UV1H_INT_CMPC_REAL_TIME_CMPC_MASK : \ + UV2H_INT_CMPC_REAL_TIME_CMPC_MASK) union uvh_int_cmpc_u { unsigned long v; @@ -448,8 +689,16 @@ union uvh_int_cmpc_u { /* ========================================================================= */ #define UVH_INT_CMPD 0x22180UL -#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 -#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL +#define UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT 0 +#define UV2H_INT_CMPD_REAL_TIME_CMPD_SHFT 0 +#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT (is_uv1_hub() ? \ + UV1H_INT_CMPD_REAL_TIME_CMPD_SHFT : \ + UV2H_INT_CMPD_REAL_TIME_CMPD_SHFT) +#define UV1H_INT_CMPD_REAL_TIME_CMPD_MASK 0xffffffffffffffUL +#define UV2H_INT_CMPD_REAL_TIME_CMPD_MASK 0xffffffffffffffUL +#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK (is_uv1_hub() ? \ + UV1H_INT_CMPD_REAL_TIME_CMPD_MASK : \ + UV2H_INT_CMPD_REAL_TIME_CMPD_MASK) union uvh_int_cmpd_u { unsigned long v; @@ -463,7 +712,7 @@ union uvh_int_cmpd_u { /* UVH_IPI_INT */ /* ========================================================================= */ #define UVH_IPI_INT 0x60500UL -#define UVH_IPI_INT_32 0x0348 +#define UVH_IPI_INT_32 0x348 #define UVH_IPI_INT_VECTOR_SHFT 0 #define UVH_IPI_INT_VECTOR_MASK 0x00000000000000ffUL @@ -493,7 +742,7 @@ union uvh_ipi_int_u { /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST 0x320050UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x009c0 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_32 0x9c0 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST_ADDRESS_MASK 0x000007fffffffff0UL @@ -515,7 +764,7 @@ union uvh_lb_bau_intd_payload_queue_first_u { /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST 0x320060UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x009c8 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_32 0x9c8 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST_ADDRESS_MASK 0x000007fffffffff0UL @@ -533,7 +782,7 @@ union uvh_lb_bau_intd_payload_queue_last_u { /* UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL 0x320070UL -#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x009d0 +#define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_32 0x9d0 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_SHFT 4 #define UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL_ADDRESS_MASK 0x000007fffffffff0UL @@ -551,7 +800,7 @@ union uvh_lb_bau_intd_payload_queue_tail_u { /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE 0x320080UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0x0a68 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_32 0xa68 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_SHFT 0 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_PENDING_0_MASK 0x0000000000000001UL @@ -585,6 +834,7 @@ union uvh_lb_bau_intd_payload_queue_tail_u { #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_6_MASK 0x0000000000004000UL #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_SHFT 15 #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_TIMEOUT_7_MASK 0x0000000000008000UL + union uvh_lb_bau_intd_software_acknowledge_u { unsigned long v; struct uvh_lb_bau_intd_software_acknowledge_s { @@ -612,13 +862,13 @@ union uvh_lb_bau_intd_software_acknowledge_u { /* UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS */ /* ========================================================================= */ #define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS 0x0000000000320088UL -#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0x0a70 +#define UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS_32 0xa70 /* ========================================================================= */ /* UVH_LB_BAU_MISC_CONTROL */ /* ========================================================================= */ #define UVH_LB_BAU_MISC_CONTROL 0x320170UL -#define UVH_LB_BAU_MISC_CONTROL_32 0x00a10 +#define UVH_LB_BAU_MISC_CONTROL_32 0xa10 #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 #define UVH_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL @@ -628,8 +878,8 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UVH_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 #define UVH_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL -#define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_SHFT 11 -#define UVH_LB_BAU_MISC_CONTROL_CSI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL +#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 +#define UVH_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 #define UVH_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL #define UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 @@ -650,8 +900,86 @@ union uvh_lb_bau_intd_software_acknowledge_u { #define UVH_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 #define UVH_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL -#define UVH_LB_BAU_MISC_CONTROL_FUN_SHFT 48 -#define UVH_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL + +#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 +#define UV1H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL +#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 +#define UV1H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL +#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 +#define UV1H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL +#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 +#define UV1H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL +#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 +#define UV1H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL +#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 +#define UV1H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL +#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 +#define UV1H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL +#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 +#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL +#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 +#define UV1H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL +#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 +#define UV1H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL +#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 +#define UV1H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL +#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 +#define UV1H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 +#define UV1H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL +#define UV1H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 +#define UV1H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL + +#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_SHFT 0 +#define UV2H_LB_BAU_MISC_CONTROL_REJECTION_DELAY_MASK 0x00000000000000ffUL +#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_SHFT 8 +#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_MASK 0x0000000000000100UL +#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_SHFT 9 +#define UV2H_LB_BAU_MISC_CONTROL_FORCE_BROADCAST_MASK 0x0000000000000200UL +#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_SHFT 10 +#define UV2H_LB_BAU_MISC_CONTROL_FORCE_LOCK_NOP_MASK 0x0000000000000400UL +#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_SHFT 11 +#define UV2H_LB_BAU_MISC_CONTROL_QPI_AGENT_PRESENCE_VECTOR_MASK 0x0000000000003800UL +#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_SHFT 14 +#define UV2H_LB_BAU_MISC_CONTROL_DESCRIPTOR_FETCH_MODE_MASK 0x0000000000004000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT 15 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_MASK 0x0000000000008000UL +#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT 16 +#define UV2H_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_MASK 0x00000000000f0000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_SHFT 20 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_DUAL_MAPPING_MODE_MASK 0x0000000000100000UL +#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_SHFT 21 +#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_DECODE_ENABLE_MASK 0x0000000000200000UL +#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_SHFT 22 +#define UV2H_LB_BAU_MISC_CONTROL_VGA_IO_PORT_16_BIT_DECODE_MASK 0x0000000000400000UL +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_SHFT 23 +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_DEST_REGISTRATION_MASK 0x0000000000800000UL +#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_SHFT 24 +#define UV2H_LB_BAU_MISC_CONTROL_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000007000000UL +#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_SHFT 27 +#define UV2H_LB_BAU_MISC_CONTROL_USE_INCOMING_PRIORITY_MASK 0x0000000008000000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_SHFT 28 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_PROGRAMMED_INITIAL_PRIORITY_MASK 0x0000000010000000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_SHFT 29 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_AUTOMATIC_APIC_MODE_SELECTION_MASK 0x0000000020000000UL +#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_SHFT 30 +#define UV2H_LB_BAU_MISC_CONTROL_APIC_MODE_STATUS_MASK 0x0000000040000000UL +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_SHFT 31 +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INTERRUPTS_TO_SELF_MASK 0x0000000080000000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_SHFT 32 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_LOCK_BASED_SYSTEM_FLUSH_MASK 0x0000000100000000UL +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_SHFT 33 +#define UV2H_LB_BAU_MISC_CONTROL_ENABLE_EXTENDED_SB_STATUS_MASK 0x0000000200000000UL +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_SHFT 34 +#define UV2H_LB_BAU_MISC_CONTROL_SUPPRESS_INT_PRIO_UDT_TO_SELF_MASK 0x0000000400000000UL +#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_SHFT 35 +#define UV2H_LB_BAU_MISC_CONTROL_USE_LEGACY_DESCRIPTOR_FORMATS_MASK 0x0000000800000000UL +#define UV2H_LB_BAU_MISC_CONTROL_FUN_SHFT 48 +#define UV2H_LB_BAU_MISC_CONTROL_FUN_MASK 0xffff000000000000UL union uvh_lb_bau_misc_control_u { unsigned long v; @@ -660,7 +988,25 @@ union uvh_lb_bau_misc_control_u { unsigned long apic_mode : 1; /* RW */ unsigned long force_broadcast : 1; /* RW */ unsigned long force_lock_nop : 1; /* RW */ - unsigned long csi_agent_presence_vector : 3; /* RW */ + unsigned long qpi_agent_presence_vector : 3; /* RW */ + unsigned long descriptor_fetch_mode : 1; /* RW */ + unsigned long enable_intd_soft_ack_mode : 1; /* RW */ + unsigned long intd_soft_ack_timeout_period : 4; /* RW */ + unsigned long enable_dual_mapping_mode : 1; /* RW */ + unsigned long vga_io_port_decode_enable : 1; /* RW */ + unsigned long vga_io_port_16_bit_decode : 1; /* RW */ + unsigned long suppress_dest_registration : 1; /* RW */ + unsigned long programmed_initial_priority : 3; /* RW */ + unsigned long use_incoming_priority : 1; /* RW */ + unsigned long enable_programmed_initial_priority : 1; /* RW */ + unsigned long rsvd_29_63 : 35; + } s; + struct uv1h_lb_bau_misc_control_s { + unsigned long rejection_delay : 8; /* RW */ + unsigned long apic_mode : 1; /* RW */ + unsigned long force_broadcast : 1; /* RW */ + unsigned long force_lock_nop : 1; /* RW */ + unsigned long qpi_agent_presence_vector : 3; /* RW */ unsigned long descriptor_fetch_mode : 1; /* RW */ unsigned long enable_intd_soft_ack_mode : 1; /* RW */ unsigned long intd_soft_ack_timeout_period : 4; /* RW */ @@ -673,14 +1019,40 @@ union uvh_lb_bau_misc_control_u { unsigned long enable_programmed_initial_priority : 1; /* RW */ unsigned long rsvd_29_47 : 19; /* */ unsigned long fun : 16; /* RW */ - } s; + } s1; + struct uv2h_lb_bau_misc_control_s { + unsigned long rejection_delay : 8; /* RW */ + unsigned long apic_mode : 1; /* RW */ + unsigned long force_broadcast : 1; /* RW */ + unsigned long force_lock_nop : 1; /* RW */ + unsigned long qpi_agent_presence_vector : 3; /* RW */ + unsigned long descriptor_fetch_mode : 1; /* RW */ + unsigned long enable_intd_soft_ack_mode : 1; /* RW */ + unsigned long intd_soft_ack_timeout_period : 4; /* RW */ + unsigned long enable_dual_mapping_mode : 1; /* RW */ + unsigned long vga_io_port_decode_enable : 1; /* RW */ + unsigned long vga_io_port_16_bit_decode : 1; /* RW */ + unsigned long suppress_dest_registration : 1; /* RW */ + unsigned long programmed_initial_priority : 3; /* RW */ + unsigned long use_incoming_priority : 1; /* RW */ + unsigned long enable_programmed_initial_priority : 1; /* RW */ + unsigned long enable_automatic_apic_mode_selection : 1; /* RW */ + unsigned long apic_mode_status : 1; /* RO */ + unsigned long suppress_interrupts_to_self : 1; /* RW */ + unsigned long enable_lock_based_system_flush : 1; /* RW */ + unsigned long enable_extended_sb_status : 1; /* RW */ + unsigned long suppress_int_prio_udt_to_self : 1; /* RW */ + unsigned long use_legacy_descriptor_formats : 1; /* RW */ + unsigned long rsvd_36_47 : 12; /* */ + unsigned long fun : 16; /* RW */ + } s2; }; /* ========================================================================= */ /* UVH_LB_BAU_SB_ACTIVATION_CONTROL */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_CONTROL 0x320020UL -#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x009a8 +#define UVH_LB_BAU_SB_ACTIVATION_CONTROL_32 0x9a8 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_CONTROL_INDEX_MASK 0x000000000000003fUL @@ -703,7 +1075,7 @@ union uvh_lb_bau_sb_activation_control_u { /* UVH_LB_BAU_SB_ACTIVATION_STATUS_0 */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0 0x320030UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x009b0 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_32 0x9b0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_0_STATUS_MASK 0xffffffffffffffffUL @@ -719,7 +1091,7 @@ union uvh_lb_bau_sb_activation_status_0_u { /* UVH_LB_BAU_SB_ACTIVATION_STATUS_1 */ /* ========================================================================= */ #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1 0x320040UL -#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x009b8 +#define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_32 0x9b8 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_SHFT 0 #define UVH_LB_BAU_SB_ACTIVATION_STATUS_1_STATUS_MASK 0xffffffffffffffffUL @@ -735,7 +1107,7 @@ union uvh_lb_bau_sb_activation_status_1_u { /* UVH_LB_BAU_SB_DESCRIPTOR_BASE */ /* ========================================================================= */ #define UVH_LB_BAU_SB_DESCRIPTOR_BASE 0x320010UL -#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x009a0 +#define UVH_LB_BAU_SB_DESCRIPTOR_BASE_32 0x9a0 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_SHFT 12 #define UVH_LB_BAU_SB_DESCRIPTOR_BASE_PAGE_ADDRESS_MASK 0x000007fffffff000UL @@ -754,23 +1126,6 @@ union uvh_lb_bau_sb_descriptor_base_u { }; /* ========================================================================= */ -/* UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK */ -/* ========================================================================= */ -#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL -#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x009f0 - -#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0 -#define UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL - -union uvh_lb_target_physical_apic_id_mask_u { - unsigned long v; - struct uvh_lb_target_physical_apic_id_mask_s { - unsigned long bit_enables : 32; /* RW */ - unsigned long rsvd_32_63 : 32; /* */ - } s; -}; - -/* ========================================================================= */ /* UVH_NODE_ID */ /* ========================================================================= */ #define UVH_NODE_ID 0x0UL @@ -785,10 +1140,36 @@ union uvh_lb_target_physical_apic_id_mask_u { #define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL #define UVH_NODE_ID_NODE_ID_SHFT 32 #define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL -#define UVH_NODE_ID_NODES_PER_BIT_SHFT 48 -#define UVH_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL -#define UVH_NODE_ID_NI_PORT_SHFT 56 -#define UVH_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL + +#define UV1H_NODE_ID_FORCE1_SHFT 0 +#define UV1H_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UV1H_NODE_ID_MANUFACTURER_SHFT 1 +#define UV1H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UV1H_NODE_ID_PART_NUMBER_SHFT 12 +#define UV1H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UV1H_NODE_ID_REVISION_SHFT 28 +#define UV1H_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UV1H_NODE_ID_NODE_ID_SHFT 32 +#define UV1H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UV1H_NODE_ID_NODES_PER_BIT_SHFT 48 +#define UV1H_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL +#define UV1H_NODE_ID_NI_PORT_SHFT 56 +#define UV1H_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL + +#define UV2H_NODE_ID_FORCE1_SHFT 0 +#define UV2H_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UV2H_NODE_ID_MANUFACTURER_SHFT 1 +#define UV2H_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UV2H_NODE_ID_PART_NUMBER_SHFT 12 +#define UV2H_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UV2H_NODE_ID_REVISION_SHFT 28 +#define UV2H_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UV2H_NODE_ID_NODE_ID_SHFT 32 +#define UV2H_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UV2H_NODE_ID_NODES_PER_BIT_SHFT 50 +#define UV2H_NODE_ID_NODES_PER_BIT_MASK 0x01fc000000000000UL +#define UV2H_NODE_ID_NI_PORT_SHFT 57 +#define UV2H_NODE_ID_NI_PORT_MASK 0x3e00000000000000UL union uvh_node_id_u { unsigned long v; @@ -798,12 +1179,31 @@ union uvh_node_id_u { unsigned long part_number : 16; /* RO */ unsigned long revision : 4; /* RO */ unsigned long node_id : 15; /* RW */ + unsigned long rsvd_47_63 : 17; + } s; + struct uv1h_node_id_s { + unsigned long force1 : 1; /* RO */ + unsigned long manufacturer : 11; /* RO */ + unsigned long part_number : 16; /* RO */ + unsigned long revision : 4; /* RO */ + unsigned long node_id : 15; /* RW */ unsigned long rsvd_47 : 1; /* */ unsigned long nodes_per_bit : 7; /* RW */ unsigned long rsvd_55 : 1; /* */ unsigned long ni_port : 4; /* RO */ unsigned long rsvd_60_63 : 4; /* */ - } s; + } s1; + struct uv2h_node_id_s { + unsigned long force1 : 1; /* RO */ + unsigned long manufacturer : 11; /* RO */ + unsigned long part_number : 16; /* RO */ + unsigned long revision : 4; /* RO */ + unsigned long node_id : 15; /* RW */ + unsigned long rsvd_47_49 : 3; /* */ + unsigned long nodes_per_bit : 7; /* RO */ + unsigned long ni_port : 5; /* RO */ + unsigned long rsvd_62_63 : 2; /* */ + } s2; }; /* ========================================================================= */ @@ -954,18 +1354,38 @@ union uvh_rh_gam_alias210_redirect_config_2_mmr_u { #define UVH_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL #define UVH_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 #define UVH_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL -#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12 -#define UVH_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL + +#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 +#define UV1H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL +#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 +#define UV1H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL +#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_SHFT 12 +#define UV1H_RH_GAM_CONFIG_MMR_MMIOL_CFG_MASK 0x0000000000001000UL + +#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_SHFT 0 +#define UV2H_RH_GAM_CONFIG_MMR_M_SKT_MASK 0x000000000000003fUL +#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_SHFT 6 +#define UV2H_RH_GAM_CONFIG_MMR_N_SKT_MASK 0x00000000000003c0UL union uvh_rh_gam_config_mmr_u { unsigned long v; struct uvh_rh_gam_config_mmr_s { unsigned long m_skt : 6; /* RW */ unsigned long n_skt : 4; /* RW */ + unsigned long rsvd_10_63 : 54; + } s; + struct uv1h_rh_gam_config_mmr_s { + unsigned long m_skt : 6; /* RW */ + unsigned long n_skt : 4; /* RW */ unsigned long rsvd_10_11: 2; /* */ unsigned long mmiol_cfg : 1; /* RW */ unsigned long rsvd_13_63: 51; /* */ - } s; + } s1; + struct uv2h_rh_gam_config_mmr_s { + unsigned long m_skt : 6; /* RW */ + unsigned long n_skt : 4; /* RW */ + unsigned long rsvd_10_63: 54; /* */ + } s2; }; /* ========================================================================= */ @@ -975,25 +1395,49 @@ union uvh_rh_gam_config_mmr_u { #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 #define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL union uvh_rh_gam_gru_overlay_config_mmr_u { unsigned long v; struct uvh_rh_gam_gru_overlay_config_mmr_s { unsigned long rsvd_0_27: 28; /* */ unsigned long base : 18; /* RW */ + unsigned long rsvd_46_62 : 17; + unsigned long enable : 1; /* RW */ + } s; + struct uv1h_rh_gam_gru_overlay_config_mmr_s { + unsigned long rsvd_0_27: 28; /* */ + unsigned long base : 18; /* RW */ unsigned long rsvd_46_47: 2; /* */ unsigned long gr4 : 1; /* RW */ unsigned long rsvd_49_51: 3; /* */ unsigned long n_gru : 4; /* RW */ unsigned long rsvd_56_62: 7; /* */ unsigned long enable : 1; /* RW */ - } s; + } s1; + struct uv2h_rh_gam_gru_overlay_config_mmr_s { + unsigned long rsvd_0_27: 28; /* */ + unsigned long base : 18; /* RW */ + unsigned long rsvd_46_51: 6; /* */ + unsigned long n_gru : 4; /* RW */ + unsigned long rsvd_56_62: 7; /* */ + unsigned long enable : 1; /* RW */ + } s2; }; /* ========================================================================= */ @@ -1001,25 +1445,42 @@ union uvh_rh_gam_gru_overlay_config_mmr_u { /* ========================================================================= */ #define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR 0x1600030UL -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 30 +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003fffc0000000UL +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT 27 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff8000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_SHFT 46 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_M_IO_MASK 0x000fc00000000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_SHFT 52 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_N_IO_MASK 0x00f0000000000000UL +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL union uvh_rh_gam_mmioh_overlay_config_mmr_u { unsigned long v; - struct uvh_rh_gam_mmioh_overlay_config_mmr_s { + struct uv1h_rh_gam_mmioh_overlay_config_mmr_s { unsigned long rsvd_0_29: 30; /* */ unsigned long base : 16; /* RW */ unsigned long m_io : 6; /* RW */ unsigned long n_io : 4; /* RW */ unsigned long rsvd_56_62: 7; /* */ unsigned long enable : 1; /* RW */ - } s; + } s1; + struct uv2h_rh_gam_mmioh_overlay_config_mmr_s { + unsigned long rsvd_0_26: 27; /* */ + unsigned long base : 19; /* RW */ + unsigned long m_io : 6; /* RW */ + unsigned long n_io : 4; /* RW */ + unsigned long rsvd_56_62: 7; /* */ + unsigned long enable : 1; /* RW */ + } s2; }; /* ========================================================================= */ @@ -1029,20 +1490,40 @@ union uvh_rh_gam_mmioh_overlay_config_mmr_u { #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 #define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 -#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV1H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UV2H_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL union uvh_rh_gam_mmr_overlay_config_mmr_u { unsigned long v; struct uvh_rh_gam_mmr_overlay_config_mmr_s { unsigned long rsvd_0_25: 26; /* */ unsigned long base : 20; /* RW */ + unsigned long rsvd_46_62 : 17; + unsigned long enable : 1; /* RW */ + } s; + struct uv1h_rh_gam_mmr_overlay_config_mmr_s { + unsigned long rsvd_0_25: 26; /* */ + unsigned long base : 20; /* RW */ unsigned long dual_hub : 1; /* RW */ unsigned long rsvd_47_62: 16; /* */ unsigned long enable : 1; /* RW */ - } s; + } s1; + struct uv2h_rh_gam_mmr_overlay_config_mmr_s { + unsigned long rsvd_0_25: 26; /* */ + unsigned long base : 20; /* RW */ + unsigned long rsvd_46_62: 17; /* */ + unsigned long enable : 1; /* RW */ + } s2; }; /* ========================================================================= */ @@ -1103,10 +1584,11 @@ union uvh_rtc1_int_config_u { /* UVH_SCRATCH5 */ /* ========================================================================= */ #define UVH_SCRATCH5 0x2d0200UL -#define UVH_SCRATCH5_32 0x00778 +#define UVH_SCRATCH5_32 0x778 #define UVH_SCRATCH5_SCRATCH5_SHFT 0 #define UVH_SCRATCH5_SCRATCH5_MASK 0xffffffffffffffffUL + union uvh_scratch5_u { unsigned long v; struct uvh_scratch5_s { @@ -1114,4 +1596,154 @@ union uvh_scratch5_u { } s; }; +/* ========================================================================= */ +/* UV2H_EVENT_OCCURRED2 */ +/* ========================================================================= */ +#define UV2H_EVENT_OCCURRED2 0x70100UL +#define UV2H_EVENT_OCCURRED2_32 0xb68 + +#define UV2H_EVENT_OCCURRED2_RTC_0_SHFT 0 +#define UV2H_EVENT_OCCURRED2_RTC_0_MASK 0x0000000000000001UL +#define UV2H_EVENT_OCCURRED2_RTC_1_SHFT 1 +#define UV2H_EVENT_OCCURRED2_RTC_1_MASK 0x0000000000000002UL +#define UV2H_EVENT_OCCURRED2_RTC_2_SHFT 2 +#define UV2H_EVENT_OCCURRED2_RTC_2_MASK 0x0000000000000004UL +#define UV2H_EVENT_OCCURRED2_RTC_3_SHFT 3 +#define UV2H_EVENT_OCCURRED2_RTC_3_MASK 0x0000000000000008UL +#define UV2H_EVENT_OCCURRED2_RTC_4_SHFT 4 +#define UV2H_EVENT_OCCURRED2_RTC_4_MASK 0x0000000000000010UL +#define UV2H_EVENT_OCCURRED2_RTC_5_SHFT 5 +#define UV2H_EVENT_OCCURRED2_RTC_5_MASK 0x0000000000000020UL +#define UV2H_EVENT_OCCURRED2_RTC_6_SHFT 6 +#define UV2H_EVENT_OCCURRED2_RTC_6_MASK 0x0000000000000040UL +#define UV2H_EVENT_OCCURRED2_RTC_7_SHFT 7 +#define UV2H_EVENT_OCCURRED2_RTC_7_MASK 0x0000000000000080UL +#define UV2H_EVENT_OCCURRED2_RTC_8_SHFT 8 +#define UV2H_EVENT_OCCURRED2_RTC_8_MASK 0x0000000000000100UL +#define UV2H_EVENT_OCCURRED2_RTC_9_SHFT 9 +#define UV2H_EVENT_OCCURRED2_RTC_9_MASK 0x0000000000000200UL +#define UV2H_EVENT_OCCURRED2_RTC_10_SHFT 10 +#define UV2H_EVENT_OCCURRED2_RTC_10_MASK 0x0000000000000400UL +#define UV2H_EVENT_OCCURRED2_RTC_11_SHFT 11 +#define UV2H_EVENT_OCCURRED2_RTC_11_MASK 0x0000000000000800UL +#define UV2H_EVENT_OCCURRED2_RTC_12_SHFT 12 +#define UV2H_EVENT_OCCURRED2_RTC_12_MASK 0x0000000000001000UL +#define UV2H_EVENT_OCCURRED2_RTC_13_SHFT 13 +#define UV2H_EVENT_OCCURRED2_RTC_13_MASK 0x0000000000002000UL +#define UV2H_EVENT_OCCURRED2_RTC_14_SHFT 14 +#define UV2H_EVENT_OCCURRED2_RTC_14_MASK 0x0000000000004000UL +#define UV2H_EVENT_OCCURRED2_RTC_15_SHFT 15 +#define UV2H_EVENT_OCCURRED2_RTC_15_MASK 0x0000000000008000UL +#define UV2H_EVENT_OCCURRED2_RTC_16_SHFT 16 +#define UV2H_EVENT_OCCURRED2_RTC_16_MASK 0x0000000000010000UL +#define UV2H_EVENT_OCCURRED2_RTC_17_SHFT 17 +#define UV2H_EVENT_OCCURRED2_RTC_17_MASK 0x0000000000020000UL +#define UV2H_EVENT_OCCURRED2_RTC_18_SHFT 18 +#define UV2H_EVENT_OCCURRED2_RTC_18_MASK 0x0000000000040000UL +#define UV2H_EVENT_OCCURRED2_RTC_19_SHFT 19 +#define UV2H_EVENT_OCCURRED2_RTC_19_MASK 0x0000000000080000UL +#define UV2H_EVENT_OCCURRED2_RTC_20_SHFT 20 +#define UV2H_EVENT_OCCURRED2_RTC_20_MASK 0x0000000000100000UL +#define UV2H_EVENT_OCCURRED2_RTC_21_SHFT 21 +#define UV2H_EVENT_OCCURRED2_RTC_21_MASK 0x0000000000200000UL +#define UV2H_EVENT_OCCURRED2_RTC_22_SHFT 22 +#define UV2H_EVENT_OCCURRED2_RTC_22_MASK 0x0000000000400000UL +#define UV2H_EVENT_OCCURRED2_RTC_23_SHFT 23 +#define UV2H_EVENT_OCCURRED2_RTC_23_MASK 0x0000000000800000UL +#define UV2H_EVENT_OCCURRED2_RTC_24_SHFT 24 +#define UV2H_EVENT_OCCURRED2_RTC_24_MASK 0x0000000001000000UL +#define UV2H_EVENT_OCCURRED2_RTC_25_SHFT 25 +#define UV2H_EVENT_OCCURRED2_RTC_25_MASK 0x0000000002000000UL +#define UV2H_EVENT_OCCURRED2_RTC_26_SHFT 26 +#define UV2H_EVENT_OCCURRED2_RTC_26_MASK 0x0000000004000000UL +#define UV2H_EVENT_OCCURRED2_RTC_27_SHFT 27 +#define UV2H_EVENT_OCCURRED2_RTC_27_MASK 0x0000000008000000UL +#define UV2H_EVENT_OCCURRED2_RTC_28_SHFT 28 +#define UV2H_EVENT_OCCURRED2_RTC_28_MASK 0x0000000010000000UL +#define UV2H_EVENT_OCCURRED2_RTC_29_SHFT 29 +#define UV2H_EVENT_OCCURRED2_RTC_29_MASK 0x0000000020000000UL +#define UV2H_EVENT_OCCURRED2_RTC_30_SHFT 30 +#define UV2H_EVENT_OCCURRED2_RTC_30_MASK 0x0000000040000000UL +#define UV2H_EVENT_OCCURRED2_RTC_31_SHFT 31 +#define UV2H_EVENT_OCCURRED2_RTC_31_MASK 0x0000000080000000UL + +union uv2h_event_occurred2_u { + unsigned long v; + struct uv2h_event_occurred2_s { + unsigned long rtc_0 : 1; /* RW */ + unsigned long rtc_1 : 1; /* RW */ + unsigned long rtc_2 : 1; /* RW */ + unsigned long rtc_3 : 1; /* RW */ + unsigned long rtc_4 : 1; /* RW */ + unsigned long rtc_5 : 1; /* RW */ + unsigned long rtc_6 : 1; /* RW */ + unsigned long rtc_7 : 1; /* RW */ + unsigned long rtc_8 : 1; /* RW */ + unsigned long rtc_9 : 1; /* RW */ + unsigned long rtc_10 : 1; /* RW */ + unsigned long rtc_11 : 1; /* RW */ + unsigned long rtc_12 : 1; /* RW */ + unsigned long rtc_13 : 1; /* RW */ + unsigned long rtc_14 : 1; /* RW */ + unsigned long rtc_15 : 1; /* RW */ + unsigned long rtc_16 : 1; /* RW */ + unsigned long rtc_17 : 1; /* RW */ + unsigned long rtc_18 : 1; /* RW */ + unsigned long rtc_19 : 1; /* RW */ + unsigned long rtc_20 : 1; /* RW */ + unsigned long rtc_21 : 1; /* RW */ + unsigned long rtc_22 : 1; /* RW */ + unsigned long rtc_23 : 1; /* RW */ + unsigned long rtc_24 : 1; /* RW */ + unsigned long rtc_25 : 1; /* RW */ + unsigned long rtc_26 : 1; /* RW */ + unsigned long rtc_27 : 1; /* RW */ + unsigned long rtc_28 : 1; /* RW */ + unsigned long rtc_29 : 1; /* RW */ + unsigned long rtc_30 : 1; /* RW */ + unsigned long rtc_31 : 1; /* RW */ + unsigned long rsvd_32_63: 32; /* */ + } s1; +}; + +/* ========================================================================= */ +/* UV2H_EVENT_OCCURRED2_ALIAS */ +/* ========================================================================= */ +#define UV2H_EVENT_OCCURRED2_ALIAS 0x70108UL +#define UV2H_EVENT_OCCURRED2_ALIAS_32 0xb70 + +/* ========================================================================= */ +/* UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 */ +/* ========================================================================= */ +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2 0x320130UL +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_32 0x9f0 + +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_SHFT 0 +#define UV2H_LB_BAU_SB_ACTIVATION_STATUS_2_AUX_ERROR_MASK 0xffffffffffffffffUL + +union uv2h_lb_bau_sb_activation_status_2_u { + unsigned long v; + struct uv2h_lb_bau_sb_activation_status_2_s { + unsigned long aux_error : 64; /* RW */ + } s1; +}; + +/* ========================================================================= */ +/* UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK */ +/* ========================================================================= */ +#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK 0x320130UL +#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_32 0x9f0 + +#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_SHFT 0 +#define UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK_BIT_ENABLES_MASK 0x00000000ffffffffUL + +union uv1h_lb_target_physical_apic_id_mask_u { + unsigned long v; + struct uv1h_lb_target_physical_apic_id_mask_s { + unsigned long bit_enables : 32; /* RW */ + unsigned long rsvd_32_63 : 32; /* */ + } s1; +}; + + #endif /* __ASM_UV_MMRS_X86_H__ */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f5abe3a245b..90b06d4daee 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -8,6 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities +CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_pvclock.o = -pg @@ -28,6 +29,7 @@ CFLAGS_paravirt.o := $(nostackp) GCOV_PROFILE_vsyscall_64.o := n GCOV_PROFILE_hpet.o := n GCOV_PROFILE_tsc.o := n +GCOV_PROFILE_vread_tsc_64.o := n GCOV_PROFILE_paravirt.o := n # vread_tsc_64 is hot and should be fully optimized: diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f450b683dfc..b511a011b7d 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -91,6 +91,10 @@ static int __init early_get_pnodeid(void) m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR); uv_min_hub_revision_id = node_id.s.revision; + if (node_id.s.part_number == UV2_HUB_PART_NUMBER) + uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1; + + uv_hub_info->hub_revision = uv_min_hub_revision_id; pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1); return pnode; } @@ -112,17 +116,25 @@ static void __init early_get_apic_pnode_shift(void) */ static void __init uv_set_apicid_hibit(void) { - union uvh_lb_target_physical_apic_id_mask_u apicid_mask; + union uv1h_lb_target_physical_apic_id_mask_u apicid_mask; - apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK); - uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK; + if (is_uv1_hub()) { + apicid_mask.v = + uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK); + uv_apicid_hibits = + apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK; + } } static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) { - int pnodeid; + int pnodeid, is_uv1, is_uv2; - if (!strcmp(oem_id, "SGI")) { + is_uv1 = !strcmp(oem_id, "SGI"); + is_uv2 = !strcmp(oem_id, "SGI2"); + if (is_uv1 || is_uv2) { + uv_hub_info->hub_revision = + is_uv1 ? UV1_HUB_REVISION_BASE : UV2_HUB_REVISION_BASE; pnodeid = early_get_pnodeid(); early_get_apic_pnode_shift(); x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; @@ -484,12 +496,19 @@ static __init void map_mmr_high(int max_pnode) static __init void map_mmioh_high(int max_pnode) { union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; - int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + int shift; mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); - if (mmioh.s.enable) - map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io, + if (is_uv1_hub() && mmioh.s1.enable) { + shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + map_high("MMIOH", mmioh.s1.base, shift, mmioh.s1.m_io, + max_pnode, map_uc); + } + if (is_uv2_hub() && mmioh.s2.enable) { + shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + map_high("MMIOH", mmioh.s2.base, shift, mmioh.s2.m_io, max_pnode, map_uc); + } } static __init void map_low_mmrs(void) @@ -736,13 +755,14 @@ void __init uv_system_init(void) unsigned long mmr_base, present, paddr; unsigned short pnode_mask, pnode_io_mask; + printk(KERN_INFO "UV: Found %s hub\n", is_uv1_hub() ? "UV1" : "UV2"); map_low_mmrs(); m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR ); m_val = m_n_config.s.m_skt; n_val = m_n_config.s.n_skt; mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); - n_io = mmioh.s.n_io; + n_io = is_uv1_hub() ? mmioh.s1.n_io : mmioh.s2.n_io; mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE; @@ -811,6 +831,8 @@ void __init uv_system_init(void) */ uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift; + uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision; + pnode = uv_apicid_to_pnode(apicid); blade = boot_pnode_to_blade(pnode); lcpu = uv_blade_info[blade].nr_possible_cpus; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 3bfa0223596..965a7666c28 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -361,6 +361,7 @@ struct apm_user { * idle percentage above which bios idle calls are done */ #ifdef CONFIG_APM_CPU_IDLE +#warning deprecated CONFIG_APM_CPU_IDLE will be deleted in 2012 #define DEFAULT_IDLE_THRESHOLD 95 #else #define DEFAULT_IDLE_THRESHOLD 100 @@ -904,6 +905,7 @@ static void apm_cpu_idle(void) unsigned int jiffies_since_last_check = jiffies - last_jiffies; unsigned int bucket; + WARN_ONCE(1, "deprecated apm_cpu_idle will be deleted in 2012"); recalc: if (jiffies_since_last_check > IDLE_CALC_LIMIT) { use_apm_idle = 0; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8f5cabb3c5b..b13ed393dfc 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -612,8 +612,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) } #endif - /* As a rule processors have APIC timer running in deep C states */ - if (c->x86 > 0xf && !cpu_has_amd_erratum(amd_erratum_400)) + /* + * Family 0x12 and above processors have APIC timer + * running in deep C states. + */ + if (c->x86 > 0x11) set_cpu_cap(c, X86_FEATURE_ARAT); /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c39576cb301..525514cf33c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -19,6 +19,7 @@ static int __init no_halt(char *s) { + WARN_ONCE(1, "\"no-hlt\" is deprecated, please use \"idle=poll\"\n"); boot_cpu_data.hlt_works_ok = 0; return 1; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c8b41623377..22a073d7fbf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -477,13 +477,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) if (smp_num_siblings <= 1) goto out; - if (smp_num_siblings > nr_cpu_ids) { - pr_warning("CPU: Unsupported number of siblings %d", - smp_num_siblings); - smp_num_siblings = 1; - return; - } - index_msb = get_count_order(smp_num_siblings); c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb); @@ -909,7 +902,7 @@ static void vgetcpu_set_mode(void) void __init identify_boot_cpu(void) { identify_cpu(&boot_cpu_data); - init_c1e_mask(); + init_amd_e400_c1e_mask(); #ifdef CONFIG_X86_32 sysenter_setup(); enable_sep_cpu(); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 0ba15a6cc57..c9a281f272f 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -123,7 +123,7 @@ static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) static atomic_t nmi_running = ATOMIC_INIT(0); static int mod_code_status; /* holds return value of text write */ static void *mod_code_ip; /* holds the IP to write to */ -static void *mod_code_newcode; /* holds the text to write to the IP */ +static const void *mod_code_newcode; /* holds the text to write to the IP */ static unsigned nmi_wait_count; static atomic_t nmi_update_count = ATOMIC_INIT(0); @@ -225,7 +225,7 @@ within(unsigned long addr, unsigned long start, unsigned long end) } static int -do_ftrace_mod_code(unsigned long ip, void *new_code) +do_ftrace_mod_code(unsigned long ip, const void *new_code) { /* * On x86_64, kernel text mappings are mapped read-only with @@ -266,8 +266,8 @@ static const unsigned char *ftrace_nop_replace(void) } static int -ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code) +ftrace_modify_code(unsigned long ip, unsigned const char *old_code, + unsigned const char *new_code) { unsigned char replaced[MCOUNT_INSN_SIZE]; @@ -301,7 +301,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned char *new, *old; + unsigned const char *new, *old; unsigned long ip = rec->ip; old = ftrace_call_replace(ip, addr); @@ -312,7 +312,7 @@ int ftrace_make_nop(struct module *mod, int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned char *new, *old; + unsigned const char *new, *old; unsigned long ip = rec->ip; old = ftrace_nop_replace(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 88a90a977f8..2e4928d45a2 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -337,7 +337,9 @@ EXPORT_SYMBOL(boot_option_idle_override); * Powermanagement idle function, if any.. */ void (*pm_idle)(void); +#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE) EXPORT_SYMBOL(pm_idle); +#endif #ifdef CONFIG_X86_32 /* @@ -397,7 +399,7 @@ void default_idle(void) cpu_relax(); } } -#ifdef CONFIG_APM_MODULE +#if defined(CONFIG_APM_MODULE) && defined(CONFIG_APM_CPU_IDLE) EXPORT_SYMBOL(default_idle); #endif @@ -535,45 +537,45 @@ int mwait_usable(const struct cpuinfo_x86 *c) return (edx & MWAIT_EDX_C1); } -bool c1e_detected; -EXPORT_SYMBOL(c1e_detected); +bool amd_e400_c1e_detected; +EXPORT_SYMBOL(amd_e400_c1e_detected); -static cpumask_var_t c1e_mask; +static cpumask_var_t amd_e400_c1e_mask; -void c1e_remove_cpu(int cpu) +void amd_e400_remove_cpu(int cpu) { - if (c1e_mask != NULL) - cpumask_clear_cpu(cpu, c1e_mask); + if (amd_e400_c1e_mask != NULL) + cpumask_clear_cpu(cpu, amd_e400_c1e_mask); } /* - * C1E aware idle routine. We check for C1E active in the interrupt + * AMD Erratum 400 aware idle routine. We check for C1E active in the interrupt * pending message MSR. If we detect C1E, then we handle it the same * way as C3 power states (local apic timer and TSC stop) */ -static void c1e_idle(void) +static void amd_e400_idle(void) { if (need_resched()) return; - if (!c1e_detected) { + if (!amd_e400_c1e_detected) { u32 lo, hi; rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); if (lo & K8_INTP_C1E_ACTIVE_MASK) { - c1e_detected = true; + amd_e400_c1e_detected = true; if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halt in AMD C1E"); printk(KERN_INFO "System has AMD C1E enabled\n"); } } - if (c1e_detected) { + if (amd_e400_c1e_detected) { int cpu = smp_processor_id(); - if (!cpumask_test_cpu(cpu, c1e_mask)) { - cpumask_set_cpu(cpu, c1e_mask); + if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) { + cpumask_set_cpu(cpu, amd_e400_c1e_mask); /* * Force broadcast so ACPI can not interfere. */ @@ -616,17 +618,17 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) pm_idle = mwait_idle; } else if (cpu_has_amd_erratum(amd_erratum_400)) { /* E400: APIC timer interrupt does not wake up CPU from C1e */ - printk(KERN_INFO "using C1E aware idle routine\n"); - pm_idle = c1e_idle; + printk(KERN_INFO "using AMD E400 aware idle routine\n"); + pm_idle = amd_e400_idle; } else pm_idle = default_idle; } -void __init init_c1e_mask(void) +void __init init_amd_e400_c1e_mask(void) { - /* If we're using c1e_idle, we need to allocate c1e_mask. */ - if (pm_idle == c1e_idle) - zalloc_cpumask_var(&c1e_mask, GFP_KERNEL); + /* If we're using amd_e400_idle, we need to allocate amd_e400_c1e_mask. */ + if (pm_idle == amd_e400_idle) + zalloc_cpumask_var(&amd_e400_c1e_mask, GFP_KERNEL); } static int __init idle_setup(char *str) @@ -640,6 +642,7 @@ static int __init idle_setup(char *str) boot_option_idle_override = IDLE_POLL; } else if (!strcmp(str, "mwait")) { boot_option_idle_override = IDLE_FORCE_MWAIT; + WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n"); } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a3e5948670c..afaf38447ef 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -910,6 +910,13 @@ void __init setup_arch(char **cmdline_p) memblock.current_limit = get_max_mapped(); memblock_x86_fill(); + /* + * The EFI specification says that boot service code won't be called + * after ExitBootServices(). This is, in fact, a lie. + */ + if (efi_enabled) + efi_reserve_boot_services(); + /* preallocate 4k for mptable mpc */ early_reserve_e820_mpc_new(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a3c430bdfb6..33a0c11797d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1307,7 +1307,7 @@ void play_dead_common(void) { idle_task_exit(); reset_lazy_tlbstate(); - c1e_remove_cpu(raw_smp_processor_id()); + amd_e400_remove_cpu(raw_smp_processor_id()); mb(); /* Ack it */ @@ -1332,7 +1332,7 @@ static inline void mwait_play_dead(void) void *mwait_ptr; struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); - if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)) + if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) return; if (!this_cpu_has(X86_FEATURE_CLFLSH)) return; diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 32cbffb0c49..fbb0a045a1a 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -345,3 +345,4 @@ ENTRY(sys_call_table) .long sys_clock_adjtime .long sys_syncfs .long sys_sendmmsg /* 345 */ + .long sys_setns diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index e191c096ab9..db832fd65ec 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -993,6 +993,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) static void lguest_time_init(void) { /* Set up the timer interrupt (0) to go to our simple timer routine */ + lguest_setup_irq(0); irq_set_handler(0, lguest_time_irq); clocksource_register_hz(&lguest_clock, NSEC_PER_SEC); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f7a2a054a3c..2dbf6bf4c7e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -823,16 +823,30 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address, force_sig_info_fault(SIGBUS, code, address, tsk, fault); } -static noinline void +static noinline int mm_fault_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, unsigned int fault) { + /* + * Pagefault was interrupted by SIGKILL. We have no reason to + * continue pagefault. + */ + if (fatal_signal_pending(current)) { + if (!(fault & VM_FAULT_RETRY)) + up_read(¤t->mm->mmap_sem); + if (!(error_code & PF_USER)) + no_context(regs, error_code, address); + return 1; + } + if (!(fault & VM_FAULT_ERROR)) + return 0; + if (fault & VM_FAULT_OOM) { /* Kernel mode? Handle exceptions or die: */ if (!(error_code & PF_USER)) { up_read(¤t->mm->mmap_sem); no_context(regs, error_code, address); - return; + return 1; } out_of_memory(regs, error_code, address); @@ -843,6 +857,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code, else BUG(); } + return 1; } static int spurious_fault_check(unsigned long error_code, pte_t *pte) @@ -1133,19 +1148,9 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, flags); - if (unlikely(fault & VM_FAULT_ERROR)) { - mm_fault_error(regs, error_code, address, fault); - return; - } - - /* - * Pagefault was interrupted by SIGKILL. We have no reason to - * continue pagefault. - */ - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) { - if (!(error_code & PF_USER)) - no_context(regs, error_code, address); - return; + if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { + if (mm_fault_error(regs, error_code, address, fault)) + return; } /* diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index c3b8e24f2b1..9fd8a567fe1 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -316,16 +316,23 @@ static void op_amd_stop_ibs(void) wrmsrl(MSR_AMD64_IBSOPCTL, 0); } -static inline int eilvt_is_available(int offset) +static inline int get_eilvt(int offset) { - /* check if we may assign a vector */ return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1); } +static inline int put_eilvt(int offset) +{ + return !setup_APIC_eilvt(offset, 0, 0, 1); +} + static inline int ibs_eilvt_valid(void) { int offset; u64 val; + int valid = 0; + + preempt_disable(); rdmsrl(MSR_AMD64_IBSCTL, val); offset = val & IBSCTL_LVT_OFFSET_MASK; @@ -333,16 +340,20 @@ static inline int ibs_eilvt_valid(void) if (!(val & IBSCTL_LVT_OFFSET_VALID)) { pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n", smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - return 0; + goto out; } - if (!eilvt_is_available(offset)) { + if (!get_eilvt(offset)) { pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n", smp_processor_id(), offset, MSR_AMD64_IBSCTL, val); - return 0; + goto out; } - return 1; + valid = 1; +out: + preempt_enable(); + + return valid; } static inline int get_ibs_offset(void) @@ -600,67 +611,69 @@ static int setup_ibs_ctl(int ibs_eilvt_off) static int force_ibs_eilvt_setup(void) { - int i; + int offset; int ret; - /* find the next free available EILVT entry */ - for (i = 1; i < 4; i++) { - if (!eilvt_is_available(i)) - continue; - ret = setup_ibs_ctl(i); - if (ret) - return ret; - pr_err(FW_BUG "using offset %d for IBS interrupts\n", i); - return 0; + /* + * find the next free available EILVT entry, skip offset 0, + * pin search to this cpu + */ + preempt_disable(); + for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) { + if (get_eilvt(offset)) + break; } + preempt_enable(); - printk(KERN_DEBUG "No EILVT entry available\n"); - - return -EBUSY; -} - -static int __init_ibs_nmi(void) -{ - int ret; - - if (ibs_eilvt_valid()) - return 0; + if (offset == APIC_EILVT_NR_MAX) { + printk(KERN_DEBUG "No EILVT entry available\n"); + return -EBUSY; + } - ret = force_ibs_eilvt_setup(); + ret = setup_ibs_ctl(offset); if (ret) - return ret; + goto out; - if (!ibs_eilvt_valid()) - return -EFAULT; + if (!ibs_eilvt_valid()) { + ret = -EFAULT; + goto out; + } + pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); return 0; +out: + preempt_disable(); + put_eilvt(offset); + preempt_enable(); + return ret; } /* * check and reserve APIC extended interrupt LVT offset for IBS if * available - * - * init_ibs() preforms implicitly cpu-local operations, so pin this - * thread to its current CPU */ static void init_ibs(void) { - preempt_disable(); - ibs_caps = get_ibs_caps(); + if (!ibs_caps) + return; + + if (ibs_eilvt_valid()) goto out; - if (__init_ibs_nmi() < 0) - ibs_caps = 0; - else - printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); + if (!force_ibs_eilvt_setup()) + goto out; + + /* Failed to setup ibs */ + ibs_caps = 0; + return; out: - preempt_enable(); + printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps); } static int (*create_arch_files)(struct super_block *sb, struct dentry *root); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index b30aa26a8df..0d3a4fa3456 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -304,6 +304,40 @@ static void __init print_efi_memmap(void) } #endif /* EFI_DEBUG */ +void __init efi_reserve_boot_services(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + + memblock_x86_reserve_range(start, start + size, "EFI Boot"); + } +} + +static void __init efi_free_boot_services(void) +{ + void *p; + + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + efi_memory_desc_t *md = p; + unsigned long long start = md->phys_addr; + unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) + continue; + + free_bootmem_late(start, size); + } +} + void __init efi_init(void) { efi_config_table_t *config_tables; @@ -536,7 +570,9 @@ void __init efi_enter_virtual_mode(void) for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - if (!(md->attribute & EFI_MEMORY_RUNTIME)) + if (!(md->attribute & EFI_MEMORY_RUNTIME) && + md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) continue; size = md->num_pages << EFI_PAGE_SHIFT; @@ -593,6 +629,13 @@ void __init efi_enter_virtual_mode(void) } /* + * Thankfully, it does seem that no runtime services other than + * SetVirtualAddressMap() will touch boot services code, so we can + * get rid of it all at this point + */ + efi_free_boot_services(); + + /* * Now that EFI is in virtual mode, update the function * pointers in the runtime service table to the new virtual addresses. * diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 2649426a790..ac3aa54e265 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -49,10 +49,11 @@ static void __init early_code_mapping_set_exec(int executable) if (!(__supported_pte_mask & _PAGE_NX)) return; - /* Make EFI runtime service code area executable */ + /* Make EFI service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { md = p; - if (md->type == EFI_RUNTIME_SERVICES_CODE) + if (md->type == EFI_RUNTIME_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_CODE) efi_set_executable(md, executable); } } diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index c58e0ea39ef..68e467f69fe 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1,7 +1,7 @@ /* * SGI UltraViolet TLB flush routines. * - * (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI. + * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI. * * This code is released under the GNU General Public License version 2 or * later. @@ -35,6 +35,7 @@ static int timeout_base_ns[] = { 5242880, 167772160 }; + static int timeout_us; static int nobau; static int baudisabled; @@ -42,20 +43,70 @@ static spinlock_t disable_lock; static cycles_t congested_cycles; /* tunables: */ -static int max_bau_concurrent = MAX_BAU_CONCURRENT; -static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT; -static int plugged_delay = PLUGGED_DELAY; -static int plugsb4reset = PLUGSB4RESET; -static int timeoutsb4reset = TIMEOUTSB4RESET; -static int ipi_reset_limit = IPI_RESET_LIMIT; -static int complete_threshold = COMPLETE_THRESHOLD; -static int congested_response_us = CONGESTED_RESPONSE_US; -static int congested_reps = CONGESTED_REPS; -static int congested_period = CONGESTED_PERIOD; +static int max_concurr = MAX_BAU_CONCURRENT; +static int max_concurr_const = MAX_BAU_CONCURRENT; +static int plugged_delay = PLUGGED_DELAY; +static int plugsb4reset = PLUGSB4RESET; +static int timeoutsb4reset = TIMEOUTSB4RESET; +static int ipi_reset_limit = IPI_RESET_LIMIT; +static int complete_threshold = COMPLETE_THRESHOLD; +static int congested_respns_us = CONGESTED_RESPONSE_US; +static int congested_reps = CONGESTED_REPS; +static int congested_period = CONGESTED_PERIOD; + +static struct tunables tunables[] = { + {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ + {&plugged_delay, PLUGGED_DELAY}, + {&plugsb4reset, PLUGSB4RESET}, + {&timeoutsb4reset, TIMEOUTSB4RESET}, + {&ipi_reset_limit, IPI_RESET_LIMIT}, + {&complete_threshold, COMPLETE_THRESHOLD}, + {&congested_respns_us, CONGESTED_RESPONSE_US}, + {&congested_reps, CONGESTED_REPS}, + {&congested_period, CONGESTED_PERIOD} +}; + static struct dentry *tunables_dir; static struct dentry *tunables_file; -static int __init setup_nobau(char *arg) +/* these correspond to the statistics printed by ptc_seq_show() */ +static char *stat_description[] = { + "sent: number of shootdown messages sent", + "stime: time spent sending messages", + "numuvhubs: number of hubs targeted with shootdown", + "numuvhubs16: number times 16 or more hubs targeted", + "numuvhubs8: number times 8 or more hubs targeted", + "numuvhubs4: number times 4 or more hubs targeted", + "numuvhubs2: number times 2 or more hubs targeted", + "numuvhubs1: number times 1 hub targeted", + "numcpus: number of cpus targeted with shootdown", + "dto: number of destination timeouts", + "retries: destination timeout retries sent", + "rok: : destination timeouts successfully retried", + "resetp: ipi-style resource resets for plugs", + "resett: ipi-style resource resets for timeouts", + "giveup: fall-backs to ipi-style shootdowns", + "sto: number of source timeouts", + "bz: number of stay-busy's", + "throt: number times spun in throttle", + "swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE", + "recv: shootdown messages received", + "rtime: time spent processing messages", + "all: shootdown all-tlb messages", + "one: shootdown one-tlb messages", + "mult: interrupts that found multiple messages", + "none: interrupts that found no messages", + "retry: number of retry messages processed", + "canc: number messages canceled by retries", + "nocan: number retries that found nothing to cancel", + "reset: number of ipi-style reset requests processed", + "rcan: number messages canceled by reset requests", + "disable: number times use of the BAU was disabled", + "enable: number times use of the BAU was re-enabled" +}; + +static int __init +setup_nobau(char *arg) { nobau = 1; return 0; @@ -63,7 +114,7 @@ static int __init setup_nobau(char *arg) early_param("nobau", setup_nobau); /* base pnode in this partition */ -static int uv_partition_base_pnode __read_mostly; +static int uv_base_pnode __read_mostly; /* position of pnode (which is nasid>>1): */ static int uv_nshift __read_mostly; static unsigned long uv_mmask __read_mostly; @@ -109,60 +160,52 @@ static int __init uvhub_to_first_apicid(int uvhub) * clear of the Timeout bit (as well) will free the resource. No reply will * be sent (the hardware will only do one reply per message). */ -static inline void uv_reply_to_message(struct msg_desc *mdp, - struct bau_control *bcp) +static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp) { unsigned long dw; - struct bau_payload_queue_entry *msg; + struct bau_pq_entry *msg; msg = mdp->msg; if (!msg->canceled) { - dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | - msg->sw_ack_vector; - uv_write_local_mmr( - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); + dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec; + write_mmr_sw_ack(dw); } msg->replied_to = 1; - msg->sw_ack_vector = 0; + msg->swack_vec = 0; } /* * Process the receipt of a RETRY message */ -static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, - struct bau_control *bcp) +static void bau_process_retry_msg(struct msg_desc *mdp, + struct bau_control *bcp) { int i; int cancel_count = 0; - int slot2; unsigned long msg_res; unsigned long mmr = 0; - struct bau_payload_queue_entry *msg; - struct bau_payload_queue_entry *msg2; - struct ptc_stats *stat; + struct bau_pq_entry *msg = mdp->msg; + struct bau_pq_entry *msg2; + struct ptc_stats *stat = bcp->statp; - msg = mdp->msg; - stat = bcp->statp; stat->d_retries++; /* * cancel any message from msg+1 to the retry itself */ for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { - if (msg2 > mdp->va_queue_last) - msg2 = mdp->va_queue_first; + if (msg2 > mdp->queue_last) + msg2 = mdp->queue_first; if (msg2 == msg) break; - /* same conditions for cancellation as uv_do_reset */ + /* same conditions for cancellation as do_reset */ if ((msg2->replied_to == 0) && (msg2->canceled == 0) && - (msg2->sw_ack_vector) && ((msg2->sw_ack_vector & - msg->sw_ack_vector) == 0) && + (msg2->swack_vec) && ((msg2->swack_vec & + msg->swack_vec) == 0) && (msg2->sending_cpu == msg->sending_cpu) && (msg2->msg_type != MSG_NOOP)) { - slot2 = msg2 - mdp->va_queue_first; - mmr = uv_read_local_mmr - (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); - msg_res = msg2->sw_ack_vector; + mmr = read_mmr_sw_ack(); + msg_res = msg2->swack_vec; /* * This is a message retry; clear the resources held * by the previous message only if they timed out. @@ -170,6 +213,7 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, * situation to report. */ if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { + unsigned long mr; /* * is the resource timed out? * make everyone ignore the cancelled message. @@ -177,10 +221,8 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, msg2->canceled = 1; stat->d_canceled++; cancel_count++; - uv_write_local_mmr( - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, - (msg_res << UV_SW_ACK_NPENDING) | - msg_res); + mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; + write_mmr_sw_ack(mr); } } } @@ -192,20 +234,19 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, * Do all the things a cpu should do for a TLB shootdown message. * Other cpu's may come here at the same time for this message. */ -static void uv_bau_process_message(struct msg_desc *mdp, - struct bau_control *bcp) +static void bau_process_message(struct msg_desc *mdp, + struct bau_control *bcp) { - int msg_ack_count; short socket_ack_count = 0; - struct ptc_stats *stat; - struct bau_payload_queue_entry *msg; + short *sp; + struct atomic_short *asp; + struct ptc_stats *stat = bcp->statp; + struct bau_pq_entry *msg = mdp->msg; struct bau_control *smaster = bcp->socket_master; /* * This must be a normal message, or retry of a normal message */ - msg = mdp->msg; - stat = bcp->statp; if (msg->address == TLB_FLUSH_ALL) { local_flush_tlb(); stat->d_alltlb++; @@ -222,30 +263,32 @@ static void uv_bau_process_message(struct msg_desc *mdp, * cpu number. */ if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) - uv_bau_process_retry_msg(mdp, bcp); + bau_process_retry_msg(mdp, bcp); /* - * This is a sw_ack message, so we have to reply to it. + * This is a swack message, so we have to reply to it. * Count each responding cpu on the socket. This avoids * pinging the count's cache line back and forth between * the sockets. */ - socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) - &smaster->socket_acknowledge_count[mdp->msg_slot]); + sp = &smaster->socket_acknowledge_count[mdp->msg_slot]; + asp = (struct atomic_short *)sp; + socket_ack_count = atom_asr(1, asp); if (socket_ack_count == bcp->cpus_in_socket) { + int msg_ack_count; /* * Both sockets dump their completed count total into * the message's count. */ smaster->socket_acknowledge_count[mdp->msg_slot] = 0; - msg_ack_count = atomic_add_short_return(socket_ack_count, - (struct atomic_short *)&msg->acknowledge_count); + asp = (struct atomic_short *)&msg->acknowledge_count; + msg_ack_count = atom_asr(socket_ack_count, asp); if (msg_ack_count == bcp->cpus_in_uvhub) { /* * All cpus in uvhub saw it; reply */ - uv_reply_to_message(mdp, bcp); + reply_to_message(mdp, bcp); } } @@ -268,62 +311,51 @@ static int uvhub_to_first_cpu(int uvhub) * Last resort when we get a large number of destination timeouts is * to clear resources held by a given cpu. * Do this with IPI so that all messages in the BAU message queue - * can be identified by their nonzero sw_ack_vector field. + * can be identified by their nonzero swack_vec field. * * This is entered for a single cpu on the uvhub. * The sender want's this uvhub to free a specific message's - * sw_ack resources. + * swack resources. */ -static void -uv_do_reset(void *ptr) +static void do_reset(void *ptr) { int i; - int slot; - int count = 0; - unsigned long mmr; - unsigned long msg_res; - struct bau_control *bcp; - struct reset_args *rap; - struct bau_payload_queue_entry *msg; - struct ptc_stats *stat; + struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id()); + struct reset_args *rap = (struct reset_args *)ptr; + struct bau_pq_entry *msg; + struct ptc_stats *stat = bcp->statp; - bcp = &per_cpu(bau_control, smp_processor_id()); - rap = (struct reset_args *)ptr; - stat = bcp->statp; stat->d_resets++; - /* * We're looking for the given sender, and - * will free its sw_ack resource. + * will free its swack resource. * If all cpu's finally responded after the timeout, its * message 'replied_to' was set. */ - for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { - /* uv_do_reset: same conditions for cancellation as - uv_bau_process_retry_msg() */ + for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { + unsigned long msg_res; + /* do_reset: same conditions for cancellation as + bau_process_retry_msg() */ if ((msg->replied_to == 0) && (msg->canceled == 0) && (msg->sending_cpu == rap->sender) && - (msg->sw_ack_vector) && + (msg->swack_vec) && (msg->msg_type != MSG_NOOP)) { + unsigned long mmr; + unsigned long mr; /* * make everyone else ignore this message */ msg->canceled = 1; - slot = msg - bcp->va_queue_first; - count++; /* * only reset the resource if it is still pending */ - mmr = uv_read_local_mmr - (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); - msg_res = msg->sw_ack_vector; + mmr = read_mmr_sw_ack(); + msg_res = msg->swack_vec; + mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; if (mmr & msg_res) { stat->d_rcanceled++; - uv_write_local_mmr( - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, - (msg_res << UV_SW_ACK_NPENDING) | - msg_res); + write_mmr_sw_ack(mr); } } } @@ -334,39 +366,38 @@ uv_do_reset(void *ptr) * Use IPI to get all target uvhubs to release resources held by * a given sending cpu number. */ -static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, - int sender) +static void reset_with_ipi(struct bau_targ_hubmask *distribution, int sender) { int uvhub; - int cpu; + int maskbits; cpumask_t mask; struct reset_args reset_args; reset_args.sender = sender; - cpus_clear(mask); /* find a single cpu for each uvhub in this distribution mask */ - for (uvhub = 0; - uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; - uvhub++) { + maskbits = sizeof(struct bau_targ_hubmask) * BITSPERBYTE; + for (uvhub = 0; uvhub < maskbits; uvhub++) { + int cpu; if (!bau_uvhub_isset(uvhub, distribution)) continue; /* find a cpu for this uvhub */ cpu = uvhub_to_first_cpu(uvhub); cpu_set(cpu, mask); } - /* IPI all cpus; Preemption is already disabled */ - smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); + + /* IPI all cpus; preemption is already disabled */ + smp_call_function_many(&mask, do_reset, (void *)&reset_args, 1); return; } -static inline unsigned long -cycles_2_us(unsigned long long cyc) +static inline unsigned long cycles_2_us(unsigned long long cyc) { unsigned long long ns; unsigned long us; - ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) - >> CYC2NS_SCALE_FACTOR; + int cpu = smp_processor_id(); + + ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR; us = ns / 1000; return us; } @@ -376,56 +407,56 @@ cycles_2_us(unsigned long long cyc) * leaves uvhub_quiesce set so that no new broadcasts are started by * bau_flush_send_and_wait() */ -static inline void -quiesce_local_uvhub(struct bau_control *hmaster) +static inline void quiesce_local_uvhub(struct bau_control *hmaster) { - atomic_add_short_return(1, (struct atomic_short *) - &hmaster->uvhub_quiesce); + atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce); } /* * mark this quiet-requestor as done */ -static inline void -end_uvhub_quiesce(struct bau_control *hmaster) +static inline void end_uvhub_quiesce(struct bau_control *hmaster) { - atomic_add_short_return(-1, (struct atomic_short *) - &hmaster->uvhub_quiesce); + atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce); +} + +static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift) +{ + unsigned long descriptor_status; + + descriptor_status = uv_read_local_mmr(mmr_offset); + descriptor_status >>= right_shift; + descriptor_status &= UV_ACT_STATUS_MASK; + return descriptor_status; } /* * Wait for completion of a broadcast software ack message * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP */ -static int uv_wait_completion(struct bau_desc *bau_desc, - unsigned long mmr_offset, int right_shift, int this_cpu, - struct bau_control *bcp, struct bau_control *smaster, long try) +static int uv1_wait_completion(struct bau_desc *bau_desc, + unsigned long mmr_offset, int right_shift, + struct bau_control *bcp, long try) { unsigned long descriptor_status; - cycles_t ttime; + cycles_t ttm; struct ptc_stats *stat = bcp->statp; - struct bau_control *hmaster; - - hmaster = bcp->uvhub_master; + descriptor_status = uv1_read_status(mmr_offset, right_shift); /* spin on the status MMR, waiting for it to go idle */ - while ((descriptor_status = (((unsigned long) - uv_read_local_mmr(mmr_offset) >> - right_shift) & UV_ACT_STATUS_MASK)) != - DESC_STATUS_IDLE) { + while ((descriptor_status != DS_IDLE)) { /* - * Our software ack messages may be blocked because there are - * no swack resources available. As long as none of them - * has timed out hardware will NACK our message and its - * state will stay IDLE. + * Our software ack messages may be blocked because + * there are no swack resources available. As long + * as none of them has timed out hardware will NACK + * our message and its state will stay IDLE. */ - if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { + if (descriptor_status == DS_SOURCE_TIMEOUT) { stat->s_stimeout++; return FLUSH_GIVEUP; - } else if (descriptor_status == - DESC_STATUS_DESTINATION_TIMEOUT) { + } else if (descriptor_status == DS_DESTINATION_TIMEOUT) { stat->s_dtimeout++; - ttime = get_cycles(); + ttm = get_cycles(); /* * Our retries may be blocked by all destination @@ -433,8 +464,7 @@ static int uv_wait_completion(struct bau_desc *bau_desc, * pending. In that case hardware returns the * ERROR that looks like a destination timeout. */ - if (cycles_2_us(ttime - bcp->send_message) < - timeout_us) { + if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { bcp->conseccompletes = 0; return FLUSH_RETRY_PLUGGED; } @@ -447,80 +477,160 @@ static int uv_wait_completion(struct bau_desc *bau_desc, */ cpu_relax(); } + descriptor_status = uv1_read_status(mmr_offset, right_shift); } bcp->conseccompletes++; return FLUSH_COMPLETE; } -static inline cycles_t -sec_2_cycles(unsigned long sec) +/* + * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. + */ +static unsigned long uv2_read_status(unsigned long offset, int rshft, int cpu) { - unsigned long ns; - cycles_t cyc; + unsigned long descriptor_status; + unsigned long descriptor_status2; - ns = sec * 1000000000; - cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); - return cyc; + descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); + descriptor_status2 = (read_mmr_uv2_status() >> cpu) & 0x1UL; + descriptor_status = (descriptor_status << 1) | descriptor_status2; + return descriptor_status; +} + +static int uv2_wait_completion(struct bau_desc *bau_desc, + unsigned long mmr_offset, int right_shift, + struct bau_control *bcp, long try) +{ + unsigned long descriptor_stat; + cycles_t ttm; + int cpu = bcp->uvhub_cpu; + struct ptc_stats *stat = bcp->statp; + + descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); + + /* spin on the status MMR, waiting for it to go idle */ + while (descriptor_stat != UV2H_DESC_IDLE) { + /* + * Our software ack messages may be blocked because + * there are no swack resources available. As long + * as none of them has timed out hardware will NACK + * our message and its state will stay IDLE. + */ + if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || + (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) || + (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { + stat->s_stimeout++; + return FLUSH_GIVEUP; + } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { + stat->s_dtimeout++; + ttm = get_cycles(); + /* + * Our retries may be blocked by all destination + * swack resources being consumed, and a timeout + * pending. In that case hardware returns the + * ERROR that looks like a destination timeout. + */ + if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { + bcp->conseccompletes = 0; + return FLUSH_RETRY_PLUGGED; + } + bcp->conseccompletes = 0; + return FLUSH_RETRY_TIMEOUT; + } else { + /* + * descriptor_stat is still BUSY + */ + cpu_relax(); + } + descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); + } + bcp->conseccompletes++; + return FLUSH_COMPLETE; } /* - * conditionally add 1 to *v, unless *v is >= u - * return 0 if we cannot add 1 to *v because it is >= u - * return 1 if we can add 1 to *v because it is < u - * the add is atomic - * - * This is close to atomic_add_unless(), but this allows the 'u' value - * to be lowered below the current 'v'. atomic_add_unless can only stop - * on equal. + * There are 2 status registers; each and array[32] of 2 bits. Set up for + * which register to read and position in that register based on cpu in + * current hub. */ -static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) +static int wait_completion(struct bau_desc *bau_desc, + struct bau_control *bcp, long try) { - spin_lock(lock); - if (atomic_read(v) >= u) { - spin_unlock(lock); - return 0; + int right_shift; + unsigned long mmr_offset; + int cpu = bcp->uvhub_cpu; + + if (cpu < UV_CPUS_PER_AS) { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; + right_shift = cpu * UV_ACT_STATUS_SIZE; + } else { + mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; + right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE); } - atomic_inc(v); - spin_unlock(lock); - return 1; + + if (is_uv1_hub()) + return uv1_wait_completion(bau_desc, mmr_offset, right_shift, + bcp, try); + else + return uv2_wait_completion(bau_desc, mmr_offset, right_shift, + bcp, try); +} + +static inline cycles_t sec_2_cycles(unsigned long sec) +{ + unsigned long ns; + cycles_t cyc; + + ns = sec * 1000000000; + cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); + return cyc; } /* - * Our retries are blocked by all destination swack resources being + * Our retries are blocked by all destination sw ack resources being * in use, and a timeout is pending. In that case hardware immediately * returns the ERROR that looks like a destination timeout. */ -static void -destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp, +static void destination_plugged(struct bau_desc *bau_desc, + struct bau_control *bcp, struct bau_control *hmaster, struct ptc_stats *stat) { udelay(bcp->plugged_delay); bcp->plugged_tries++; + if (bcp->plugged_tries >= bcp->plugsb4reset) { bcp->plugged_tries = 0; + quiesce_local_uvhub(hmaster); + spin_lock(&hmaster->queue_lock); - uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); + reset_with_ipi(&bau_desc->distribution, bcp->cpu); spin_unlock(&hmaster->queue_lock); + end_uvhub_quiesce(hmaster); + bcp->ipi_attempts++; stat->s_resets_plug++; } } -static void -destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, - struct bau_control *hmaster, struct ptc_stats *stat) +static void destination_timeout(struct bau_desc *bau_desc, + struct bau_control *bcp, struct bau_control *hmaster, + struct ptc_stats *stat) { - hmaster->max_bau_concurrent = 1; + hmaster->max_concurr = 1; bcp->timeout_tries++; if (bcp->timeout_tries >= bcp->timeoutsb4reset) { bcp->timeout_tries = 0; + quiesce_local_uvhub(hmaster); + spin_lock(&hmaster->queue_lock); - uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); + reset_with_ipi(&bau_desc->distribution, bcp->cpu); spin_unlock(&hmaster->queue_lock); + end_uvhub_quiesce(hmaster); + bcp->ipi_attempts++; stat->s_resets_timeout++; } @@ -530,34 +640,104 @@ destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, * Completions are taking a very long time due to a congested numalink * network. */ -static void -disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) +static void disable_for_congestion(struct bau_control *bcp, + struct ptc_stats *stat) { - int tcpu; - struct bau_control *tbcp; - /* let only one cpu do this disabling */ spin_lock(&disable_lock); + if (!baudisabled && bcp->period_requests && ((bcp->period_time / bcp->period_requests) > congested_cycles)) { + int tcpu; + struct bau_control *tbcp; /* it becomes this cpu's job to turn on the use of the BAU again */ baudisabled = 1; bcp->set_bau_off = 1; - bcp->set_bau_on_time = get_cycles() + - sec_2_cycles(bcp->congested_period); + bcp->set_bau_on_time = get_cycles(); + bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period); stat->s_bau_disabled++; for_each_present_cpu(tcpu) { tbcp = &per_cpu(bau_control, tcpu); - tbcp->baudisabled = 1; + tbcp->baudisabled = 1; } } + spin_unlock(&disable_lock); } -/** - * uv_flush_send_and_wait - * +static void count_max_concurr(int stat, struct bau_control *bcp, + struct bau_control *hmaster) +{ + bcp->plugged_tries = 0; + bcp->timeout_tries = 0; + if (stat != FLUSH_COMPLETE) + return; + if (bcp->conseccompletes <= bcp->complete_threshold) + return; + if (hmaster->max_concurr >= hmaster->max_concurr_const) + return; + hmaster->max_concurr++; +} + +static void record_send_stats(cycles_t time1, cycles_t time2, + struct bau_control *bcp, struct ptc_stats *stat, + int completion_status, int try) +{ + cycles_t elapsed; + + if (time2 > time1) { + elapsed = time2 - time1; + stat->s_time += elapsed; + + if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { + bcp->period_requests++; + bcp->period_time += elapsed; + if ((elapsed > congested_cycles) && + (bcp->period_requests > bcp->cong_reps)) + disable_for_congestion(bcp, stat); + } + } else + stat->s_requestor--; + + if (completion_status == FLUSH_COMPLETE && try > 1) + stat->s_retriesok++; + else if (completion_status == FLUSH_GIVEUP) + stat->s_giveup++; +} + +/* + * Because of a uv1 hardware bug only a limited number of concurrent + * requests can be made. + */ +static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) +{ + spinlock_t *lock = &hmaster->uvhub_lock; + atomic_t *v; + + v = &hmaster->active_descriptor_count; + if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) { + stat->s_throttles++; + do { + cpu_relax(); + } while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)); + } +} + +/* + * Handle the completion status of a message send. + */ +static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, + struct bau_control *bcp, struct bau_control *hmaster, + struct ptc_stats *stat) +{ + if (completion_status == FLUSH_RETRY_PLUGGED) + destination_plugged(bau_desc, bcp, hmaster, stat); + else if (completion_status == FLUSH_RETRY_TIMEOUT) + destination_timeout(bau_desc, bcp, hmaster, stat); +} + +/* * Send a broadcast and wait for it to complete. * * The flush_mask contains the cpus the broadcast is to be sent to including @@ -568,44 +748,23 @@ disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) * returned to the kernel. */ int uv_flush_send_and_wait(struct bau_desc *bau_desc, - struct cpumask *flush_mask, struct bau_control *bcp) + struct cpumask *flush_mask, struct bau_control *bcp) { - int right_shift; - int completion_status = 0; int seq_number = 0; + int completion_stat = 0; long try = 0; - int cpu = bcp->uvhub_cpu; - int this_cpu = bcp->cpu; - unsigned long mmr_offset; unsigned long index; cycles_t time1; cycles_t time2; - cycles_t elapsed; struct ptc_stats *stat = bcp->statp; - struct bau_control *smaster = bcp->socket_master; struct bau_control *hmaster = bcp->uvhub_master; - if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, - &hmaster->active_descriptor_count, - hmaster->max_bau_concurrent)) { - stat->s_throttles++; - do { - cpu_relax(); - } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, - &hmaster->active_descriptor_count, - hmaster->max_bau_concurrent)); - } + if (is_uv1_hub()) + uv1_throttle(hmaster, stat); + while (hmaster->uvhub_quiesce) cpu_relax(); - if (cpu < UV_CPUS_PER_ACT_STATUS) { - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; - right_shift = cpu * UV_ACT_STATUS_SIZE; - } else { - mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; - right_shift = - ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); - } time1 = get_cycles(); do { if (try == 0) { @@ -615,64 +774,134 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc, bau_desc->header.msg_type = MSG_RETRY; stat->s_retry_messages++; } + bau_desc->header.sequence = seq_number; - index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | - bcp->uvhub_cpu; + index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; bcp->send_message = get_cycles(); - uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); + + write_mmr_activation(index); + try++; - completion_status = uv_wait_completion(bau_desc, mmr_offset, - right_shift, this_cpu, bcp, smaster, try); + completion_stat = wait_completion(bau_desc, bcp, try); + + handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); - if (completion_status == FLUSH_RETRY_PLUGGED) { - destination_plugged(bau_desc, bcp, hmaster, stat); - } else if (completion_status == FLUSH_RETRY_TIMEOUT) { - destination_timeout(bau_desc, bcp, hmaster, stat); - } if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { bcp->ipi_attempts = 0; - completion_status = FLUSH_GIVEUP; + completion_stat = FLUSH_GIVEUP; break; } cpu_relax(); - } while ((completion_status == FLUSH_RETRY_PLUGGED) || - (completion_status == FLUSH_RETRY_TIMEOUT)); + } while ((completion_stat == FLUSH_RETRY_PLUGGED) || + (completion_stat == FLUSH_RETRY_TIMEOUT)); + time2 = get_cycles(); - bcp->plugged_tries = 0; - bcp->timeout_tries = 0; - if ((completion_status == FLUSH_COMPLETE) && - (bcp->conseccompletes > bcp->complete_threshold) && - (hmaster->max_bau_concurrent < - hmaster->max_bau_concurrent_constant)) - hmaster->max_bau_concurrent++; + + count_max_concurr(completion_stat, bcp, hmaster); + while (hmaster->uvhub_quiesce) cpu_relax(); + atomic_dec(&hmaster->active_descriptor_count); - if (time2 > time1) { - elapsed = time2 - time1; - stat->s_time += elapsed; - if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { - bcp->period_requests++; - bcp->period_time += elapsed; - if ((elapsed > congested_cycles) && - (bcp->period_requests > bcp->congested_reps)) { - disable_for_congestion(bcp, stat); + + record_send_stats(time1, time2, bcp, stat, completion_stat, try); + + if (completion_stat == FLUSH_GIVEUP) + return 1; + return 0; +} + +/* + * The BAU is disabled. When the disabled time period has expired, the cpu + * that disabled it must re-enable it. + * Return 0 if it is re-enabled for all cpus. + */ +static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) +{ + int tcpu; + struct bau_control *tbcp; + + if (bcp->set_bau_off) { + if (get_cycles() >= bcp->set_bau_on_time) { + stat->s_bau_reenabled++; + baudisabled = 0; + for_each_present_cpu(tcpu) { + tbcp = &per_cpu(bau_control, tcpu); + tbcp->baudisabled = 0; + tbcp->period_requests = 0; + tbcp->period_time = 0; } + return 0; } + } + return -1; +} + +static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs, + int remotes, struct bau_desc *bau_desc) +{ + stat->s_requestor++; + stat->s_ntargcpu += remotes + locals; + stat->s_ntargremotes += remotes; + stat->s_ntarglocals += locals; + + /* uvhub statistics */ + hubs = bau_uvhub_weight(&bau_desc->distribution); + if (locals) { + stat->s_ntarglocaluvhub++; + stat->s_ntargremoteuvhub += (hubs - 1); } else - stat->s_requestor--; - if (completion_status == FLUSH_COMPLETE && try > 1) - stat->s_retriesok++; - else if (completion_status == FLUSH_GIVEUP) { - stat->s_giveup++; - return 1; + stat->s_ntargremoteuvhub += hubs; + + stat->s_ntarguvhub += hubs; + + if (hubs >= 16) + stat->s_ntarguvhub16++; + else if (hubs >= 8) + stat->s_ntarguvhub8++; + else if (hubs >= 4) + stat->s_ntarguvhub4++; + else if (hubs >= 2) + stat->s_ntarguvhub2++; + else + stat->s_ntarguvhub1++; +} + +/* + * Translate a cpu mask to the uvhub distribution mask in the BAU + * activation descriptor. + */ +static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp, + struct bau_desc *bau_desc, int *localsp, int *remotesp) +{ + int cpu; + int pnode; + int cnt = 0; + struct hub_and_pnode *hpp; + + for_each_cpu(cpu, flush_mask) { + /* + * The distribution vector is a bit map of pnodes, relative + * to the partition base pnode (and the partition base nasid + * in the header). + * Translate cpu to pnode and hub using a local memory array. + */ + hpp = &bcp->socket_master->thp[cpu]; + pnode = hpp->pnode - bcp->partition_base_pnode; + bau_uvhub_set(pnode, &bau_desc->distribution); + cnt++; + if (hpp->uvhub == bcp->uvhub) + (*localsp)++; + else + (*remotesp)++; } + if (!cnt) + return 1; return 0; } -/** - * uv_flush_tlb_others - globally purge translation cache of a virtual - * address or all TLB's +/* + * globally purge translation cache of a virtual address or all TLB's * @cpumask: mask of all cpu's in which the address is to be removed * @mm: mm_struct containing virtual address range * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) @@ -696,20 +925,16 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc, * done. The returned pointer is valid till preemption is re-enabled. */ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, - struct mm_struct *mm, - unsigned long va, unsigned int cpu) + struct mm_struct *mm, unsigned long va, + unsigned int cpu) { int locals = 0; int remotes = 0; int hubs = 0; - int tcpu; - int tpnode; struct bau_desc *bau_desc; struct cpumask *flush_mask; struct ptc_stats *stat; struct bau_control *bcp; - struct bau_control *tbcp; - struct hub_and_pnode *hpp; /* kernel was booted 'nobau' */ if (nobau) @@ -720,20 +945,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, /* bau was disabled due to slow response */ if (bcp->baudisabled) { - /* the cpu that disabled it must re-enable it */ - if (bcp->set_bau_off) { - if (get_cycles() >= bcp->set_bau_on_time) { - stat->s_bau_reenabled++; - baudisabled = 0; - for_each_present_cpu(tcpu) { - tbcp = &per_cpu(bau_control, tcpu); - tbcp->baudisabled = 0; - tbcp->period_requests = 0; - tbcp->period_time = 0; - } - } - } - return cpumask; + if (check_enable(bcp, stat)) + return cpumask; } /* @@ -744,59 +957,20 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); /* don't actually do a shootdown of the local cpu */ cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); + if (cpu_isset(cpu, *cpumask)) stat->s_ntargself++; bau_desc = bcp->descriptor_base; - bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; + bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu; bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); - - for_each_cpu(tcpu, flush_mask) { - /* - * The distribution vector is a bit map of pnodes, relative - * to the partition base pnode (and the partition base nasid - * in the header). - * Translate cpu to pnode and hub using an array stored - * in local memory. - */ - hpp = &bcp->socket_master->target_hub_and_pnode[tcpu]; - tpnode = hpp->pnode - bcp->partition_base_pnode; - bau_uvhub_set(tpnode, &bau_desc->distribution); - if (hpp->uvhub == bcp->uvhub) - locals++; - else - remotes++; - } - if ((locals + remotes) == 0) + if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) return NULL; - stat->s_requestor++; - stat->s_ntargcpu += remotes + locals; - stat->s_ntargremotes += remotes; - stat->s_ntarglocals += locals; - remotes = bau_uvhub_weight(&bau_desc->distribution); - /* uvhub statistics */ - hubs = bau_uvhub_weight(&bau_desc->distribution); - if (locals) { - stat->s_ntarglocaluvhub++; - stat->s_ntargremoteuvhub += (hubs - 1); - } else - stat->s_ntargremoteuvhub += hubs; - stat->s_ntarguvhub += hubs; - if (hubs >= 16) - stat->s_ntarguvhub16++; - else if (hubs >= 8) - stat->s_ntarguvhub8++; - else if (hubs >= 4) - stat->s_ntarguvhub4++; - else if (hubs >= 2) - stat->s_ntarguvhub2++; - else - stat->s_ntarguvhub1++; + record_send_statistics(stat, locals, hubs, remotes, bau_desc); bau_desc->payload.address = va; bau_desc->payload.sending_cpu = cpu; - /* * uv_flush_send_and_wait returns 0 if all cpu's were messaged, * or 1 if it gave up and the original cpumask should be returned. @@ -825,26 +999,31 @@ void uv_bau_message_interrupt(struct pt_regs *regs) { int count = 0; cycles_t time_start; - struct bau_payload_queue_entry *msg; + struct bau_pq_entry *msg; struct bau_control *bcp; struct ptc_stats *stat; struct msg_desc msgdesc; time_start = get_cycles(); + bcp = &per_cpu(bau_control, smp_processor_id()); stat = bcp->statp; - msgdesc.va_queue_first = bcp->va_queue_first; - msgdesc.va_queue_last = bcp->va_queue_last; + + msgdesc.queue_first = bcp->queue_first; + msgdesc.queue_last = bcp->queue_last; + msg = bcp->bau_msg_head; - while (msg->sw_ack_vector) { + while (msg->swack_vec) { count++; - msgdesc.msg_slot = msg - msgdesc.va_queue_first; - msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; + + msgdesc.msg_slot = msg - msgdesc.queue_first; + msgdesc.swack_slot = ffs(msg->swack_vec) - 1; msgdesc.msg = msg; - uv_bau_process_message(&msgdesc, bcp); + bau_process_message(&msgdesc, bcp); + msg++; - if (msg > msgdesc.va_queue_last) - msg = msgdesc.va_queue_first; + if (msg > msgdesc.queue_last) + msg = msgdesc.queue_first; bcp->bau_msg_head = msg; } stat->d_time += (get_cycles() - time_start); @@ -852,18 +1031,17 @@ void uv_bau_message_interrupt(struct pt_regs *regs) stat->d_nomsg++; else if (count > 1) stat->d_multmsg++; + ack_APIC_irq(); } /* - * uv_enable_timeouts - * - * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have + * Each target uvhub (i.e. a uvhub that has cpu's) needs to have * shootdown message timeouts enabled. The timeout does not cause * an interrupt, but causes an error message to be returned to * the sender. */ -static void __init uv_enable_timeouts(void) +static void __init enable_timeouts(void) { int uvhub; int nuvhubs; @@ -877,47 +1055,44 @@ static void __init uv_enable_timeouts(void) continue; pnode = uv_blade_to_pnode(uvhub); - mmr_image = - uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); + mmr_image = read_mmr_misc_control(pnode); /* * Set the timeout period and then lock it in, in three * steps; captures and locks in the period. * * To program the period, the SOFT_ACK_MODE must be off. */ - mmr_image &= ~((unsigned long)1 << - UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); - uv_write_global_mmr64 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); + mmr_image &= ~(1L << SOFTACK_MSHIFT); + write_mmr_misc_control(pnode, mmr_image); /* * Set the 4-bit period. */ - mmr_image &= ~((unsigned long)0xf << - UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); - mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << - UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); - uv_write_global_mmr64 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); + mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT); + mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT); + write_mmr_misc_control(pnode, mmr_image); /* + * UV1: * Subsequent reversals of the timebase bit (3) cause an * immediate timeout of one or all INTD resources as * indicated in bits 2:0 (7 causes all of them to timeout). */ - mmr_image |= ((unsigned long)1 << - UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); - uv_write_global_mmr64 - (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); + mmr_image |= (1L << SOFTACK_MSHIFT); + if (is_uv2_hub()) { + mmr_image |= (1L << UV2_LEG_SHFT); + mmr_image |= (1L << UV2_EXT_SHFT); + } + write_mmr_misc_control(pnode, mmr_image); } } -static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) +static void *ptc_seq_start(struct seq_file *file, loff_t *offset) { if (*offset < num_possible_cpus()) return offset; return NULL; } -static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) +static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) { (*offset)++; if (*offset < num_possible_cpus()) @@ -925,12 +1100,11 @@ static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) return NULL; } -static void uv_ptc_seq_stop(struct seq_file *file, void *data) +static void ptc_seq_stop(struct seq_file *file, void *data) { } -static inline unsigned long long -microsec_2_cycles(unsigned long microsec) +static inline unsigned long long usec_2_cycles(unsigned long microsec) { unsigned long ns; unsigned long long cyc; @@ -941,29 +1115,27 @@ microsec_2_cycles(unsigned long microsec) } /* - * Display the statistics thru /proc. + * Display the statistics thru /proc/sgi_uv/ptc_statistics * 'data' points to the cpu number + * Note: see the descriptions in stat_description[]. */ -static int uv_ptc_seq_show(struct seq_file *file, void *data) +static int ptc_seq_show(struct seq_file *file, void *data) { struct ptc_stats *stat; int cpu; cpu = *(loff_t *)data; - if (!cpu) { seq_printf(file, "# cpu sent stime self locals remotes ncpus localhub "); seq_printf(file, "remotehub numuvhubs numuvhubs16 numuvhubs8 "); seq_printf(file, - "numuvhubs4 numuvhubs2 numuvhubs1 dto "); - seq_printf(file, - "retries rok resetp resett giveup sto bz throt "); + "numuvhubs4 numuvhubs2 numuvhubs1 dto retries rok "); seq_printf(file, - "sw_ack recv rtime all "); + "resetp resett giveup sto bz throt swack recv rtime "); seq_printf(file, - "one mult none retry canc nocan reset rcan "); + "all one mult none retry canc nocan reset rcan "); seq_printf(file, "disable enable\n"); } @@ -990,8 +1162,7 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) /* destination side statistics */ seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", - uv_read_global_mmr64(uv_cpu_to_pnode(cpu), - UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), + read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), stat->d_requestee, cycles_2_us(stat->d_time), stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, stat->d_nomsg, stat->d_retries, stat->d_canceled, @@ -1000,7 +1171,6 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) seq_printf(file, "%ld %ld\n", stat->s_bau_disabled, stat->s_bau_reenabled); } - return 0; } @@ -1008,18 +1178,18 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) * Display the tunables thru debugfs */ static ssize_t tunables_read(struct file *file, char __user *userbuf, - size_t count, loff_t *ppos) + size_t count, loff_t *ppos) { char *buf; int ret; buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", - "max_bau_concurrent plugged_delay plugsb4reset", + "max_concur plugged_delay plugsb4reset", "timeoutsb4reset ipi_reset_limit complete_threshold", "congested_response_us congested_reps congested_period", - max_bau_concurrent, plugged_delay, plugsb4reset, + max_concurr, plugged_delay, plugsb4reset, timeoutsb4reset, ipi_reset_limit, complete_threshold, - congested_response_us, congested_reps, congested_period); + congested_respns_us, congested_reps, congested_period); if (!buf) return -ENOMEM; @@ -1030,13 +1200,16 @@ static ssize_t tunables_read(struct file *file, char __user *userbuf, } /* - * -1: resetf the statistics + * handle a write to /proc/sgi_uv/ptc_statistics + * -1: reset the statistics * 0: display meaning of the statistics */ -static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, - size_t count, loff_t *data) +static ssize_t ptc_proc_write(struct file *file, const char __user *user, + size_t count, loff_t *data) { int cpu; + int i; + int elements; long input_arg; char optstr[64]; struct ptc_stats *stat; @@ -1046,79 +1219,18 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, if (copy_from_user(optstr, user, count)) return -EFAULT; optstr[count - 1] = '\0'; + if (strict_strtol(optstr, 10, &input_arg) < 0) { printk(KERN_DEBUG "%s is invalid\n", optstr); return -EINVAL; } if (input_arg == 0) { + elements = sizeof(stat_description)/sizeof(*stat_description); printk(KERN_DEBUG "# cpu: cpu number\n"); printk(KERN_DEBUG "Sender statistics:\n"); - printk(KERN_DEBUG - "sent: number of shootdown messages sent\n"); - printk(KERN_DEBUG - "stime: time spent sending messages\n"); - printk(KERN_DEBUG - "numuvhubs: number of hubs targeted with shootdown\n"); - printk(KERN_DEBUG - "numuvhubs16: number times 16 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs8: number times 8 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs4: number times 4 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs2: number times 2 or more hubs targeted\n"); - printk(KERN_DEBUG - "numuvhubs1: number times 1 hub targeted\n"); - printk(KERN_DEBUG - "numcpus: number of cpus targeted with shootdown\n"); - printk(KERN_DEBUG - "dto: number of destination timeouts\n"); - printk(KERN_DEBUG - "retries: destination timeout retries sent\n"); - printk(KERN_DEBUG - "rok: : destination timeouts successfully retried\n"); - printk(KERN_DEBUG - "resetp: ipi-style resource resets for plugs\n"); - printk(KERN_DEBUG - "resett: ipi-style resource resets for timeouts\n"); - printk(KERN_DEBUG - "giveup: fall-backs to ipi-style shootdowns\n"); - printk(KERN_DEBUG - "sto: number of source timeouts\n"); - printk(KERN_DEBUG - "bz: number of stay-busy's\n"); - printk(KERN_DEBUG - "throt: number times spun in throttle\n"); - printk(KERN_DEBUG "Destination side statistics:\n"); - printk(KERN_DEBUG - "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); - printk(KERN_DEBUG - "recv: shootdown messages received\n"); - printk(KERN_DEBUG - "rtime: time spent processing messages\n"); - printk(KERN_DEBUG - "all: shootdown all-tlb messages\n"); - printk(KERN_DEBUG - "one: shootdown one-tlb messages\n"); - printk(KERN_DEBUG - "mult: interrupts that found multiple messages\n"); - printk(KERN_DEBUG - "none: interrupts that found no messages\n"); - printk(KERN_DEBUG - "retry: number of retry messages processed\n"); - printk(KERN_DEBUG - "canc: number messages canceled by retries\n"); - printk(KERN_DEBUG - "nocan: number retries that found nothing to cancel\n"); - printk(KERN_DEBUG - "reset: number of ipi-style reset requests processed\n"); - printk(KERN_DEBUG - "rcan: number messages canceled by reset requests\n"); - printk(KERN_DEBUG - "disable: number times use of the BAU was disabled\n"); - printk(KERN_DEBUG - "enable: number times use of the BAU was re-enabled\n"); + for (i = 0; i < elements; i++) + printk(KERN_DEBUG "%s\n", stat_description[i]); } else if (input_arg == -1) { for_each_present_cpu(cpu) { stat = &per_cpu(ptcstats, cpu); @@ -1145,27 +1257,18 @@ static int local_atoi(const char *name) } /* - * set the tunables - * 0 values reset them to defaults + * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables. + * Zero values reset them to defaults. */ -static ssize_t tunables_write(struct file *file, const char __user *user, - size_t count, loff_t *data) +static int parse_tunables_write(struct bau_control *bcp, char *instr, + int count) { - int cpu; - int cnt = 0; - int val; char *p; char *q; - char instr[64]; - struct bau_control *bcp; - - if (count == 0 || count > sizeof(instr)-1) - return -EINVAL; - if (copy_from_user(instr, user, count)) - return -EFAULT; + int cnt = 0; + int val; + int e = sizeof(tunables) / sizeof(*tunables); - instr[count] = '\0'; - /* count the fields */ p = instr + strspn(instr, WHITESPACE); q = p; for (; *p; p = q + strspn(q, WHITESPACE)) { @@ -1174,8 +1277,8 @@ static ssize_t tunables_write(struct file *file, const char __user *user, if (q == p) break; } - if (cnt != 9) { - printk(KERN_INFO "bau tunable error: should be 9 numbers\n"); + if (cnt != e) { + printk(KERN_INFO "bau tunable error: should be %d values\n", e); return -EINVAL; } @@ -1187,97 +1290,80 @@ static ssize_t tunables_write(struct file *file, const char __user *user, switch (cnt) { case 0: if (val == 0) { - max_bau_concurrent = MAX_BAU_CONCURRENT; - max_bau_concurrent_constant = - MAX_BAU_CONCURRENT; + max_concurr = MAX_BAU_CONCURRENT; + max_concurr_const = MAX_BAU_CONCURRENT; continue; } - bcp = &per_cpu(bau_control, smp_processor_id()); if (val < 1 || val > bcp->cpus_in_uvhub) { printk(KERN_DEBUG "Error: BAU max concurrent %d is invalid\n", val); return -EINVAL; } - max_bau_concurrent = val; - max_bau_concurrent_constant = val; - continue; - case 1: - if (val == 0) - plugged_delay = PLUGGED_DELAY; - else - plugged_delay = val; - continue; - case 2: - if (val == 0) - plugsb4reset = PLUGSB4RESET; - else - plugsb4reset = val; - continue; - case 3: - if (val == 0) - timeoutsb4reset = TIMEOUTSB4RESET; - else - timeoutsb4reset = val; - continue; - case 4: - if (val == 0) - ipi_reset_limit = IPI_RESET_LIMIT; - else - ipi_reset_limit = val; - continue; - case 5: - if (val == 0) - complete_threshold = COMPLETE_THRESHOLD; - else - complete_threshold = val; - continue; - case 6: - if (val == 0) - congested_response_us = CONGESTED_RESPONSE_US; - else - congested_response_us = val; - continue; - case 7: - if (val == 0) - congested_reps = CONGESTED_REPS; - else - congested_reps = val; + max_concurr = val; + max_concurr_const = val; continue; - case 8: + default: if (val == 0) - congested_period = CONGESTED_PERIOD; + *tunables[cnt].tunp = tunables[cnt].deflt; else - congested_period = val; + *tunables[cnt].tunp = val; continue; } if (q == p) break; } + return 0; +} + +/* + * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables) + */ +static ssize_t tunables_write(struct file *file, const char __user *user, + size_t count, loff_t *data) +{ + int cpu; + int ret; + char instr[100]; + struct bau_control *bcp; + + if (count == 0 || count > sizeof(instr)-1) + return -EINVAL; + if (copy_from_user(instr, user, count)) + return -EFAULT; + + instr[count] = '\0'; + + bcp = &per_cpu(bau_control, smp_processor_id()); + + ret = parse_tunables_write(bcp, instr, count); + if (ret) + return ret; + for_each_present_cpu(cpu) { bcp = &per_cpu(bau_control, cpu); - bcp->max_bau_concurrent = max_bau_concurrent; - bcp->max_bau_concurrent_constant = max_bau_concurrent; - bcp->plugged_delay = plugged_delay; - bcp->plugsb4reset = plugsb4reset; - bcp->timeoutsb4reset = timeoutsb4reset; - bcp->ipi_reset_limit = ipi_reset_limit; - bcp->complete_threshold = complete_threshold; - bcp->congested_response_us = congested_response_us; - bcp->congested_reps = congested_reps; - bcp->congested_period = congested_period; + bcp->max_concurr = max_concurr; + bcp->max_concurr_const = max_concurr; + bcp->plugged_delay = plugged_delay; + bcp->plugsb4reset = plugsb4reset; + bcp->timeoutsb4reset = timeoutsb4reset; + bcp->ipi_reset_limit = ipi_reset_limit; + bcp->complete_threshold = complete_threshold; + bcp->cong_response_us = congested_respns_us; + bcp->cong_reps = congested_reps; + bcp->cong_period = congested_period; } return count; } static const struct seq_operations uv_ptc_seq_ops = { - .start = uv_ptc_seq_start, - .next = uv_ptc_seq_next, - .stop = uv_ptc_seq_stop, - .show = uv_ptc_seq_show + .start = ptc_seq_start, + .next = ptc_seq_next, + .stop = ptc_seq_stop, + .show = ptc_seq_show }; -static int uv_ptc_proc_open(struct inode *inode, struct file *file) +static int ptc_proc_open(struct inode *inode, struct file *file) { return seq_open(file, &uv_ptc_seq_ops); } @@ -1288,9 +1374,9 @@ static int tunables_open(struct inode *inode, struct file *file) } static const struct file_operations proc_uv_ptc_operations = { - .open = uv_ptc_proc_open, + .open = ptc_proc_open, .read = seq_read, - .write = uv_ptc_proc_write, + .write = ptc_proc_write, .llseek = seq_lseek, .release = seq_release, }; @@ -1324,7 +1410,7 @@ static int __init uv_ptc_init(void) return -EINVAL; } tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, - tunables_dir, NULL, &tunables_fops); + tunables_dir, NULL, &tunables_fops); if (!tunables_file) { printk(KERN_ERR "unable to create debugfs file %s\n", UV_BAU_TUNABLES_FILE); @@ -1336,24 +1422,24 @@ static int __init uv_ptc_init(void) /* * Initialize the sending side's sending buffers. */ -static void -uv_activation_descriptor_init(int node, int pnode, int base_pnode) +static void activation_descriptor_init(int node, int pnode, int base_pnode) { int i; int cpu; unsigned long pa; unsigned long m; unsigned long n; + size_t dsize; struct bau_desc *bau_desc; struct bau_desc *bd2; struct bau_control *bcp; /* - * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) - * per cpu; and one per cpu on the uvhub (UV_ADP_SIZE) + * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC) + * per cpu; and one per cpu on the uvhub (ADP_SZ) */ - bau_desc = kmalloc_node(sizeof(struct bau_desc) * UV_ADP_SIZE - * UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); + dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC; + bau_desc = kmalloc_node(dsize, GFP_KERNEL, node); BUG_ON(!bau_desc); pa = uv_gpa(bau_desc); /* need the real nasid*/ @@ -1361,27 +1447,25 @@ uv_activation_descriptor_init(int node, int pnode, int base_pnode) m = pa & uv_mmask; /* the 14-bit pnode */ - uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, - (n << UV_DESC_BASE_PNODE_SHIFT | m)); + write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); /* - * Initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each + * Initializing all 8 (ITEMS_PER_DESC) descriptors for each * cpu even though we only use the first one; one descriptor can * describe a broadcast to 256 uv hubs. */ - for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); - i++, bd2++) { + for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) { memset(bd2, 0, sizeof(struct bau_desc)); - bd2->header.sw_ack_flag = 1; + bd2->header.swack_flag = 1; /* * The base_dest_nasid set in the message header is the nasid * of the first uvhub in the partition. The bit map will * indicate destination pnode numbers relative to that base. * They may not be consecutive if nasid striding is being used. */ - bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); - bd2->header.dest_subnodeid = UV_LB_SUBNODEID; - bd2->header.command = UV_NET_ENDPOINT_INTD; - bd2->header.int_both = 1; + bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); + bd2->header.dest_subnodeid = UV_LB_SUBNODEID; + bd2->header.command = UV_NET_ENDPOINT_INTD; + bd2->header.int_both = 1; /* * all others need to be set to zero: * fairness chaining multilevel count replied_to @@ -1401,57 +1485,55 @@ uv_activation_descriptor_init(int node, int pnode, int base_pnode) * - node is first node (kernel memory notion) on the uvhub * - pnode is the uvhub's physical identifier */ -static void -uv_payload_queue_init(int node, int pnode) +static void pq_init(int node, int pnode) { - int pn; int cpu; + size_t plsize; char *cp; - unsigned long pa; - struct bau_payload_queue_entry *pqp; - struct bau_payload_queue_entry *pqp_malloc; + void *vp; + unsigned long pn; + unsigned long first; + unsigned long pn_first; + unsigned long last; + struct bau_pq_entry *pqp; struct bau_control *bcp; - pqp = kmalloc_node((DEST_Q_SIZE + 1) - * sizeof(struct bau_payload_queue_entry), - GFP_KERNEL, node); + plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry); + vp = kmalloc_node(plsize, GFP_KERNEL, node); + pqp = (struct bau_pq_entry *)vp; BUG_ON(!pqp); - pqp_malloc = pqp; cp = (char *)pqp + 31; - pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); + pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5); for_each_present_cpu(cpu) { if (pnode != uv_cpu_to_pnode(cpu)) continue; /* for every cpu on this pnode: */ bcp = &per_cpu(bau_control, cpu); - bcp->va_queue_first = pqp; - bcp->bau_msg_head = pqp; - bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); + bcp->queue_first = pqp; + bcp->bau_msg_head = pqp; + bcp->queue_last = pqp + (DEST_Q_SIZE - 1); } /* * need the pnode of where the memory was really allocated */ - pa = uv_gpa(pqp); - pn = pa >> uv_nshift; - uv_write_global_mmr64(pnode, - UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, - ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | - uv_physnodeaddr(pqp)); - uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, - uv_physnodeaddr(pqp)); - uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, - (unsigned long) - uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1))); + pn = uv_gpa(pqp) >> uv_nshift; + first = uv_physnodeaddr(pqp); + pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first; + last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)); + write_mmr_payload_first(pnode, pn_first); + write_mmr_payload_tail(pnode, first); + write_mmr_payload_last(pnode, last); + /* in effect, all msg_type's are set to MSG_NOOP */ - memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); + memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); } /* * Initialization of each UV hub's structures */ -static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) +static void __init init_uvhub(int uvhub, int vector, int base_pnode) { int node; int pnode; @@ -1459,24 +1541,24 @@ static void __init uv_init_uvhub(int uvhub, int vector, int base_pnode) node = uvhub_to_first_node(uvhub); pnode = uv_blade_to_pnode(uvhub); - uv_activation_descriptor_init(node, pnode, base_pnode); - uv_payload_queue_init(node, pnode); + + activation_descriptor_init(node, pnode, base_pnode); + + pq_init(node, pnode); /* * The below initialization can't be in firmware because the * messaging IRQ will be determined by the OS. */ apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; - uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, - ((apicid << 32) | vector)); + write_mmr_data_config(pnode, ((apicid << 32) | vector)); } /* * We will set BAU_MISC_CONTROL with a timeout period. * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. - * So the destination timeout period has be be calculated from them. + * So the destination timeout period has to be calculated from them. */ -static int -calculate_destination_timeout(void) +static int calculate_destination_timeout(void) { unsigned long mmr_image; int mult1; @@ -1486,73 +1568,92 @@ calculate_destination_timeout(void) int ret; unsigned long ts_ns; - mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; - mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); - index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; - mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); - mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; - base = timeout_base_ns[index]; - ts_ns = base * mult1 * mult2; - ret = ts_ns / 1000; + if (is_uv1_hub()) { + mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; + mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); + index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; + mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); + mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; + base = timeout_base_ns[index]; + ts_ns = base * mult1 * mult2; + ret = ts_ns / 1000; + } else { + /* 4 bits 0/1 for 10/80us, 3 bits of multiplier */ + mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); + mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT; + if (mmr_image & (1L << UV2_ACK_UNITS_SHFT)) + mult1 = 80; + else + mult1 = 10; + base = mmr_image & UV2_ACK_MASK; + ret = mult1 * base; + } return ret; } +static void __init init_per_cpu_tunables(void) +{ + int cpu; + struct bau_control *bcp; + + for_each_present_cpu(cpu) { + bcp = &per_cpu(bau_control, cpu); + bcp->baudisabled = 0; + bcp->statp = &per_cpu(ptcstats, cpu); + /* time interval to catch a hardware stay-busy bug */ + bcp->timeout_interval = usec_2_cycles(2*timeout_us); + bcp->max_concurr = max_concurr; + bcp->max_concurr_const = max_concurr; + bcp->plugged_delay = plugged_delay; + bcp->plugsb4reset = plugsb4reset; + bcp->timeoutsb4reset = timeoutsb4reset; + bcp->ipi_reset_limit = ipi_reset_limit; + bcp->complete_threshold = complete_threshold; + bcp->cong_response_us = congested_respns_us; + bcp->cong_reps = congested_reps; + bcp->cong_period = congested_period; + } +} + /* - * initialize the bau_control structure for each cpu + * Scan all cpus to collect blade and socket summaries. */ -static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) +static int __init get_cpu_topology(int base_pnode, + struct uvhub_desc *uvhub_descs, + unsigned char *uvhub_mask) { - int i; int cpu; - int tcpu; int pnode; int uvhub; - int have_hmaster; - short socket = 0; - unsigned short socket_mask; - unsigned char *uvhub_mask; + int socket; struct bau_control *bcp; struct uvhub_desc *bdp; struct socket_desc *sdp; - struct bau_control *hmaster = NULL; - struct bau_control *smaster = NULL; - struct socket_desc { - short num_cpus; - short cpu_number[MAX_CPUS_PER_SOCKET]; - }; - struct uvhub_desc { - unsigned short socket_mask; - short num_cpus; - short uvhub; - short pnode; - struct socket_desc socket[2]; - }; - struct uvhub_desc *uvhub_descs; - - timeout_us = calculate_destination_timeout(); - uvhub_descs = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); - memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); - uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); for_each_present_cpu(cpu) { bcp = &per_cpu(bau_control, cpu); + memset(bcp, 0, sizeof(struct bau_control)); + pnode = uv_cpu_hub_info(cpu)->pnode; - if ((pnode - base_part_pnode) >= UV_DISTRIBUTION_SIZE) { + if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) { printk(KERN_EMERG "cpu %d pnode %d-%d beyond %d; BAU disabled\n", - cpu, pnode, base_part_pnode, - UV_DISTRIBUTION_SIZE); + cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE); return 1; } + bcp->osnode = cpu_to_node(cpu); - bcp->partition_base_pnode = uv_partition_base_pnode; + bcp->partition_base_pnode = base_pnode; + uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); bdp = &uvhub_descs[uvhub]; + bdp->num_cpus++; bdp->uvhub = uvhub; bdp->pnode = pnode; + /* kludge: 'assuming' one node per socket, and assuming that disabling a socket just leaves a gap in node numbers */ socket = bcp->osnode & 1; @@ -1561,84 +1662,129 @@ static int __init uv_init_per_cpu(int nuvhubs, int base_part_pnode) sdp->cpu_number[sdp->num_cpus] = cpu; sdp->num_cpus++; if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { - printk(KERN_EMERG "%d cpus per socket invalid\n", sdp->num_cpus); + printk(KERN_EMERG "%d cpus per socket invalid\n", + sdp->num_cpus); return 1; } } + return 0; +} + +/* + * Each socket is to get a local array of pnodes/hubs. + */ +static void make_per_cpu_thp(struct bau_control *smaster) +{ + int cpu; + size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus(); + + smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode); + memset(smaster->thp, 0, hpsz); + for_each_present_cpu(cpu) { + smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode; + smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; + } +} + +/* + * Initialize all the per_cpu information for the cpu's on a given socket, + * given what has been gathered into the socket_desc struct. + * And reports the chosen hub and socket masters back to the caller. + */ +static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, + struct bau_control **smasterp, + struct bau_control **hmasterp) +{ + int i; + int cpu; + struct bau_control *bcp; + + for (i = 0; i < sdp->num_cpus; i++) { + cpu = sdp->cpu_number[i]; + bcp = &per_cpu(bau_control, cpu); + bcp->cpu = cpu; + if (i == 0) { + *smasterp = bcp; + if (!(*hmasterp)) + *hmasterp = bcp; + } + bcp->cpus_in_uvhub = bdp->num_cpus; + bcp->cpus_in_socket = sdp->num_cpus; + bcp->socket_master = *smasterp; + bcp->uvhub = bdp->uvhub; + bcp->uvhub_master = *hmasterp; + bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; + if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { + printk(KERN_EMERG "%d cpus per uvhub invalid\n", + bcp->uvhub_cpu); + return 1; + } + } + return 0; +} + +/* + * Summarize the blade and socket topology into the per_cpu structures. + */ +static int __init summarize_uvhub_sockets(int nuvhubs, + struct uvhub_desc *uvhub_descs, + unsigned char *uvhub_mask) +{ + int socket; + int uvhub; + unsigned short socket_mask; + for (uvhub = 0; uvhub < nuvhubs; uvhub++) { + struct uvhub_desc *bdp; + struct bau_control *smaster = NULL; + struct bau_control *hmaster = NULL; + if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) continue; - have_hmaster = 0; + bdp = &uvhub_descs[uvhub]; socket_mask = bdp->socket_mask; socket = 0; while (socket_mask) { - if (!(socket_mask & 1)) - goto nextsocket; - sdp = &bdp->socket[socket]; - for (i = 0; i < sdp->num_cpus; i++) { - cpu = sdp->cpu_number[i]; - bcp = &per_cpu(bau_control, cpu); - bcp->cpu = cpu; - if (i == 0) { - smaster = bcp; - if (!have_hmaster) { - have_hmaster++; - hmaster = bcp; - } - } - bcp->cpus_in_uvhub = bdp->num_cpus; - bcp->cpus_in_socket = sdp->num_cpus; - bcp->socket_master = smaster; - bcp->uvhub = bdp->uvhub; - bcp->uvhub_master = hmaster; - bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> - blade_processor_id; - if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { - printk(KERN_EMERG - "%d cpus per uvhub invalid\n", - bcp->uvhub_cpu); + struct socket_desc *sdp; + if ((socket_mask & 1)) { + sdp = &bdp->socket[socket]; + if (scan_sock(sdp, bdp, &smaster, &hmaster)) return 1; - } } -nextsocket: socket++; socket_mask = (socket_mask >> 1); - /* each socket gets a local array of pnodes/hubs */ - bcp = smaster; - bcp->target_hub_and_pnode = kmalloc_node( - sizeof(struct hub_and_pnode) * - num_possible_cpus(), GFP_KERNEL, bcp->osnode); - memset(bcp->target_hub_and_pnode, 0, - sizeof(struct hub_and_pnode) * - num_possible_cpus()); - for_each_present_cpu(tcpu) { - bcp->target_hub_and_pnode[tcpu].pnode = - uv_cpu_hub_info(tcpu)->pnode; - bcp->target_hub_and_pnode[tcpu].uvhub = - uv_cpu_hub_info(tcpu)->numa_blade_id; - } + make_per_cpu_thp(smaster); } } + return 0; +} + +/* + * initialize the bau_control structure for each cpu + */ +static int __init init_per_cpu(int nuvhubs, int base_part_pnode) +{ + unsigned char *uvhub_mask; + void *vp; + struct uvhub_desc *uvhub_descs; + + timeout_us = calculate_destination_timeout(); + + vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); + uvhub_descs = (struct uvhub_desc *)vp; + memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); + uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); + + if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask)) + return 1; + + if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask)) + return 1; + kfree(uvhub_descs); kfree(uvhub_mask); - for_each_present_cpu(cpu) { - bcp = &per_cpu(bau_control, cpu); - bcp->baudisabled = 0; - bcp->statp = &per_cpu(ptcstats, cpu); - /* time interval to catch a hardware stay-busy bug */ - bcp->timeout_interval = microsec_2_cycles(2*timeout_us); - bcp->max_bau_concurrent = max_bau_concurrent; - bcp->max_bau_concurrent_constant = max_bau_concurrent; - bcp->plugged_delay = plugged_delay; - bcp->plugsb4reset = plugsb4reset; - bcp->timeoutsb4reset = timeoutsb4reset; - bcp->ipi_reset_limit = ipi_reset_limit; - bcp->complete_threshold = complete_threshold; - bcp->congested_response_us = congested_response_us; - bcp->congested_reps = congested_reps; - bcp->congested_period = congested_period; - } + init_per_cpu_tunables(); return 0; } @@ -1651,8 +1797,9 @@ static int __init uv_bau_init(void) int pnode; int nuvhubs; int cur_cpu; + int cpus; int vector; - unsigned long mmr; + cpumask_var_t *mask; if (!is_uv_system()) return 0; @@ -1660,24 +1807,25 @@ static int __init uv_bau_init(void) if (nobau) return 0; - for_each_possible_cpu(cur_cpu) - zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), - GFP_KERNEL, cpu_to_node(cur_cpu)); + for_each_possible_cpu(cur_cpu) { + mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); + } uv_nshift = uv_hub_info->m_val; uv_mmask = (1UL << uv_hub_info->m_val) - 1; nuvhubs = uv_num_possible_blades(); spin_lock_init(&disable_lock); - congested_cycles = microsec_2_cycles(congested_response_us); + congested_cycles = usec_2_cycles(congested_respns_us); - uv_partition_base_pnode = 0x7fffffff; + uv_base_pnode = 0x7fffffff; for (uvhub = 0; uvhub < nuvhubs; uvhub++) { - if (uv_blade_nr_possible_cpus(uvhub) && - (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) - uv_partition_base_pnode = uv_blade_to_pnode(uvhub); + cpus = uv_blade_nr_possible_cpus(uvhub); + if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode)) + uv_base_pnode = uv_blade_to_pnode(uvhub); } - if (uv_init_per_cpu(nuvhubs, uv_partition_base_pnode)) { + if (init_per_cpu(nuvhubs, uv_base_pnode)) { nobau = 1; return 0; } @@ -1685,21 +1833,21 @@ static int __init uv_bau_init(void) vector = UV_BAU_MESSAGE; for_each_possible_blade(uvhub) if (uv_blade_nr_possible_cpus(uvhub)) - uv_init_uvhub(uvhub, vector, uv_partition_base_pnode); + init_uvhub(uvhub, vector, uv_base_pnode); - uv_enable_timeouts(); + enable_timeouts(); alloc_intr_gate(vector, uv_bau_message_intr1); for_each_possible_blade(uvhub) { if (uv_blade_nr_possible_cpus(uvhub)) { + unsigned long val; + unsigned long mmr; pnode = uv_blade_to_pnode(uvhub); /* INIT the bau */ - uv_write_global_mmr64(pnode, - UVH_LB_BAU_SB_ACTIVATION_CONTROL, - ((unsigned long)1 << 63)); + val = 1L << 63; + write_gmmr_activation(pnode, val); mmr = 1; /* should be 1 to broadcast to both sockets */ - uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, - mmr); + write_mmr_data_broadcast(pnode, mmr); } } diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c index 0eb90184515..9f29a01ee1b 100644 --- a/arch/x86/platform/uv/uv_time.c +++ b/arch/x86/platform/uv/uv_time.c @@ -99,8 +99,12 @@ static void uv_rtc_send_IPI(int cpu) /* Check for an RTC interrupt pending */ static int uv_intr_pending(int pnode) { - return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & - UVH_EVENT_OCCURRED0_RTC1_MASK; + if (is_uv1_hub()) + return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & + UV1H_EVENT_OCCURRED0_RTC1_MASK; + else + return uv_read_global_mmr64(pnode, UV2H_EVENT_OCCURRED2) & + UV2H_EVENT_OCCURRED2_RTC_1_MASK; } /* Setup interrupt and return non-zero if early expiration occurred. */ @@ -114,8 +118,12 @@ static int uv_setup_intr(int cpu, u64 expires) UVH_RTC1_INT_CONFIG_M_MASK); uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L); - uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, - UVH_EVENT_OCCURRED0_RTC1_MASK); + if (is_uv1_hub()) + uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, + UV1H_EVENT_OCCURRED0_RTC1_MASK); + else + uv_write_global_mmr64(pnode, UV2H_EVENT_OCCURRED2_ALIAS, + UV2H_EVENT_OCCURRED2_RTC_1_MASK); val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); diff --git a/arch/xtensa/include/asm/unistd.h b/arch/xtensa/include/asm/unistd.h index 528042c2951..a6f934f37f1 100644 --- a/arch/xtensa/include/asm/unistd.h +++ b/arch/xtensa/include/asm/unistd.h @@ -683,8 +683,10 @@ __SYSCALL(305, sys_ni_syscall, 0) __SYSCALL(306, sys_eventfd, 1) #define __NR_recvmmsg 307 __SYSCALL(307, sys_recvmmsg, 5) +#define __NR_setns 308 +__SYSCALL(308, sys_setns, 2) -#define __NR_syscall_count 308 +#define __NR_syscall_count 309 /* * sysxtensa syscall handler diff --git a/drivers/Makefile b/drivers/Makefile index 6b17f586434..09f3232bcdc 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -17,6 +17,9 @@ obj-$(CONFIG_SFI) += sfi/ # was used and do nothing if so obj-$(CONFIG_PNP) += pnp/ obj-$(CONFIG_ARM_AMBA) += amba/ +# Many drivers will want to use DMA so this has to be made available +# really early. +obj-$(CONFIG_DMA_ENGINE) += dma/ obj-$(CONFIG_VIRTIO) += virtio/ obj-$(CONFIG_XEN) += xen/ @@ -92,7 +95,6 @@ obj-$(CONFIG_EISA) += eisa/ obj-y += lguest/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_CPU_IDLE) += cpuidle/ -obj-$(CONFIG_DMA_ENGINE) += dma/ obj-$(CONFIG_MMC) += mmc/ obj-$(CONFIG_MEMSTICK) += memstick/ obj-y += leds/ diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index bc2218db5ba..de0e3df7677 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -369,6 +369,21 @@ config ACPI_HED which is used to report some hardware errors notified via SCI, mainly the corrected errors. +config ACPI_CUSTOM_METHOD + tristate "Allow ACPI methods to be inserted/replaced at run time" + depends on DEBUG_FS + default n + help + This debug facility allows ACPI AML methods to me inserted and/or + replaced without rebooting the system. For details refer to: + Documentation/acpi/method-customizing.txt. + + NOTE: This option is security sensitive, because it allows arbitrary + kernel memory to be written to by root (uid=0) users, allowing them + to bypass certain security measures (e.g. if root is not allowed to + load additional kernel modules after boot, this feature may be used + to override that restriction). + source "drivers/acpi/apei/Kconfig" endif # ACPI diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index b66fbb2fc85..ecb26b4f29a 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_ACPI_SBS) += sbshc.o obj-$(CONFIG_ACPI_SBS) += sbs.o obj-$(CONFIG_ACPI_HED) += hed.o obj-$(CONFIG_ACPI_EC_DEBUGFS) += ec_sys.o +obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custom_method.o # processor has its own "processor." module_param namespace processor-y := processor_driver.o processor_throttling.o diff --git a/drivers/acpi/acpica/Makefile b/drivers/acpi/acpica/Makefile index a1224712fd0..301bd2d388a 100644 --- a/drivers/acpi/acpica/Makefile +++ b/drivers/acpi/acpica/Makefile @@ -14,7 +14,7 @@ acpi-y := dsfield.o dsmthdat.o dsopcode.o dswexec.o dswscope.o \ acpi-y += evevent.o evregion.o evsci.o evxfevnt.o \ evmisc.o evrgnini.o evxface.o evxfregn.o \ - evgpe.o evgpeblk.o evgpeinit.o evgpeutil.o evxfgpe.o + evgpe.o evgpeblk.o evgpeinit.o evgpeutil.o evxfgpe.o evglock.o acpi-y += exconfig.o exfield.o exnames.o exoparg6.o exresolv.o exstorob.o\ exconvrt.o exfldio.o exoparg1.o exprep.o exresop.o exsystem.o\ diff --git a/drivers/acpi/acpica/acconfig.h b/drivers/acpi/acpica/acconfig.h index ab87396c2c0..bc533dde16c 100644 --- a/drivers/acpi/acpica/acconfig.h +++ b/drivers/acpi/acpica/acconfig.h @@ -187,7 +187,6 @@ /* Operation regions */ -#define ACPI_NUM_PREDEFINED_REGIONS 9 #define ACPI_USER_REGION_BEGIN 0x80 /* Maximum space_ids for Operation Regions */ diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index 41d247daf46..bea3b489918 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -58,12 +58,6 @@ u32 acpi_ev_fixed_event_detect(void); */ u8 acpi_ev_is_notify_object(struct acpi_namespace_node *node); -acpi_status acpi_ev_acquire_global_lock(u16 timeout); - -acpi_status acpi_ev_release_global_lock(void); - -acpi_status acpi_ev_init_global_lock_handler(void); - u32 acpi_ev_get_gpe_number_index(u32 gpe_number); acpi_status @@ -71,6 +65,17 @@ acpi_ev_queue_notify_request(struct acpi_namespace_node *node, u32 notify_value); /* + * evglock - Global Lock support + */ +acpi_status acpi_ev_init_global_lock_handler(void); + +acpi_status acpi_ev_acquire_global_lock(u16 timeout); + +acpi_status acpi_ev_release_global_lock(void); + +acpi_status acpi_ev_remove_global_lock_handler(void); + +/* * evgpe - Low-level GPE support */ u32 acpi_ev_gpe_detect(struct acpi_gpe_xrupt_info *gpe_xrupt_list); diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index d69750b83b3..73863d86f02 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -214,24 +214,23 @@ ACPI_EXTERN struct acpi_mutex_info acpi_gbl_mutex_info[ACPI_NUM_MUTEX]; /* * Global lock mutex is an actual AML mutex object - * Global lock semaphore works in conjunction with the HW global lock + * Global lock semaphore works in conjunction with the actual global lock + * Global lock spinlock is used for "pending" handshake */ ACPI_EXTERN union acpi_operand_object *acpi_gbl_global_lock_mutex; ACPI_EXTERN acpi_semaphore acpi_gbl_global_lock_semaphore; +ACPI_EXTERN acpi_spinlock acpi_gbl_global_lock_pending_lock; ACPI_EXTERN u16 acpi_gbl_global_lock_handle; ACPI_EXTERN u8 acpi_gbl_global_lock_acquired; ACPI_EXTERN u8 acpi_gbl_global_lock_present; +ACPI_EXTERN u8 acpi_gbl_global_lock_pending; /* * Spinlocks are used for interfaces that can be possibly called at * interrupt level */ -ACPI_EXTERN spinlock_t _acpi_gbl_gpe_lock; /* For GPE data structs and registers */ -ACPI_EXTERN spinlock_t _acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ -ACPI_EXTERN spinlock_t _acpi_ev_global_lock_pending_lock; /* For global lock */ -#define acpi_gbl_gpe_lock &_acpi_gbl_gpe_lock -#define acpi_gbl_hardware_lock &_acpi_gbl_hardware_lock -#define acpi_ev_global_lock_pending_lock &_acpi_ev_global_lock_pending_lock +ACPI_EXTERN acpi_spinlock acpi_gbl_gpe_lock; /* For GPE data structs and registers */ +ACPI_EXTERN acpi_spinlock acpi_gbl_hardware_lock; /* For ACPI H/W except GPE registers */ /***************************************************************************** * diff --git a/drivers/acpi/acpica/amlcode.h b/drivers/acpi/acpica/amlcode.h index f4f0998d396..1077f17859e 100644 --- a/drivers/acpi/acpica/amlcode.h +++ b/drivers/acpi/acpica/amlcode.h @@ -394,21 +394,6 @@ #define AML_CLASS_METHOD_CALL 0x09 #define AML_CLASS_UNKNOWN 0x0A -/* Predefined Operation Region space_iDs */ - -typedef enum { - REGION_MEMORY = 0, - REGION_IO, - REGION_PCI_CONFIG, - REGION_EC, - REGION_SMBUS, - REGION_CMOS, - REGION_PCI_BAR, - REGION_IPMI, - REGION_DATA_TABLE, /* Internal use only */ - REGION_FIXED_HW = 0x7F -} AML_REGION_TYPES; - /* Comparison operation codes for match_op operator */ typedef enum { diff --git a/drivers/acpi/acpica/dswload.c b/drivers/acpi/acpica/dswload.c index 23a3b1ab20c..324acec1179 100644 --- a/drivers/acpi/acpica/dswload.c +++ b/drivers/acpi/acpica/dswload.c @@ -450,7 +450,7 @@ acpi_status acpi_ds_load1_end_op(struct acpi_walk_state *walk_state) status = acpi_ex_create_region(op->named.data, op->named.length, - REGION_DATA_TABLE, + ACPI_ADR_SPACE_DATA_TABLE, walk_state); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); diff --git a/drivers/acpi/acpica/dswload2.c b/drivers/acpi/acpica/dswload2.c index 4be4e921dfe..976318138c5 100644 --- a/drivers/acpi/acpica/dswload2.c +++ b/drivers/acpi/acpica/dswload2.c @@ -562,7 +562,7 @@ acpi_status acpi_ds_load2_end_op(struct acpi_walk_state *walk_state) ((op->common.value.arg)->common.value. integer); } else { - region_space = REGION_DATA_TABLE; + region_space = ACPI_ADR_SPACE_DATA_TABLE; } /* diff --git a/drivers/acpi/acpica/evglock.c b/drivers/acpi/acpica/evglock.c new file mode 100644 index 00000000000..56a562a1e5d --- /dev/null +++ b/drivers/acpi/acpica/evglock.c @@ -0,0 +1,335 @@ +/****************************************************************************** + * + * Module Name: evglock - Global Lock support + * + *****************************************************************************/ + +/* + * Copyright (C) 2000 - 2011, Intel Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + */ + +#include <acpi/acpi.h> +#include "accommon.h" +#include "acevents.h" +#include "acinterp.h" + +#define _COMPONENT ACPI_EVENTS +ACPI_MODULE_NAME("evglock") + +/* Local prototypes */ +static u32 acpi_ev_global_lock_handler(void *context); + +/******************************************************************************* + * + * FUNCTION: acpi_ev_init_global_lock_handler + * + * PARAMETERS: None + * + * RETURN: Status + * + * DESCRIPTION: Install a handler for the global lock release event + * + ******************************************************************************/ + +acpi_status acpi_ev_init_global_lock_handler(void) +{ + acpi_status status; + + ACPI_FUNCTION_TRACE(ev_init_global_lock_handler); + + /* Attempt installation of the global lock handler */ + + status = acpi_install_fixed_event_handler(ACPI_EVENT_GLOBAL, + acpi_ev_global_lock_handler, + NULL); + + /* + * If the global lock does not exist on this platform, the attempt to + * enable GBL_STATUS will fail (the GBL_ENABLE bit will not stick). + * Map to AE_OK, but mark global lock as not present. Any attempt to + * actually use the global lock will be flagged with an error. + */ + acpi_gbl_global_lock_present = FALSE; + if (status == AE_NO_HARDWARE_RESPONSE) { + ACPI_ERROR((AE_INFO, + "No response from Global Lock hardware, disabling lock")); + + return_ACPI_STATUS(AE_OK); + } + + status = acpi_os_create_lock(&acpi_gbl_global_lock_pending_lock); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + acpi_gbl_global_lock_pending = FALSE; + acpi_gbl_global_lock_present = TRUE; + return_ACPI_STATUS(status); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ev_remove_global_lock_handler + * + * PARAMETERS: None + * + * RETURN: Status + * + * DESCRIPTION: Remove the handler for the Global Lock + * + ******************************************************************************/ + +acpi_status acpi_ev_remove_global_lock_handler(void) +{ + acpi_status status; + + ACPI_FUNCTION_TRACE(ev_remove_global_lock_handler); + + acpi_gbl_global_lock_present = FALSE; + status = acpi_remove_fixed_event_handler(ACPI_EVENT_GLOBAL, + acpi_ev_global_lock_handler); + + return_ACPI_STATUS(status); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ev_global_lock_handler + * + * PARAMETERS: Context - From thread interface, not used + * + * RETURN: ACPI_INTERRUPT_HANDLED + * + * DESCRIPTION: Invoked directly from the SCI handler when a global lock + * release interrupt occurs. If there is actually a pending + * request for the lock, signal the waiting thread. + * + ******************************************************************************/ + +static u32 acpi_ev_global_lock_handler(void *context) +{ + acpi_status status; + acpi_cpu_flags flags; + + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); + + /* + * If a request for the global lock is not actually pending, + * we are done. This handles "spurious" global lock interrupts + * which are possible (and have been seen) with bad BIOSs. + */ + if (!acpi_gbl_global_lock_pending) { + goto cleanup_and_exit; + } + + /* + * Send a unit to the global lock semaphore. The actual acquisition + * of the global lock will be performed by the waiting thread. + */ + status = acpi_os_signal_semaphore(acpi_gbl_global_lock_semaphore, 1); + if (ACPI_FAILURE(status)) { + ACPI_ERROR((AE_INFO, "Could not signal Global Lock semaphore")); + } + + acpi_gbl_global_lock_pending = FALSE; + + cleanup_and_exit: + + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + return (ACPI_INTERRUPT_HANDLED); +} + +/****************************************************************************** + * + * FUNCTION: acpi_ev_acquire_global_lock + * + * PARAMETERS: Timeout - Max time to wait for the lock, in millisec. + * + * RETURN: Status + * + * DESCRIPTION: Attempt to gain ownership of the Global Lock. + * + * MUTEX: Interpreter must be locked + * + * Note: The original implementation allowed multiple threads to "acquire" the + * Global Lock, and the OS would hold the lock until the last thread had + * released it. However, this could potentially starve the BIOS out of the + * lock, especially in the case where there is a tight handshake between the + * Embedded Controller driver and the BIOS. Therefore, this implementation + * allows only one thread to acquire the HW Global Lock at a time, and makes + * the global lock appear as a standard mutex on the OS side. + * + *****************************************************************************/ + +acpi_status acpi_ev_acquire_global_lock(u16 timeout) +{ + acpi_cpu_flags flags; + acpi_status status; + u8 acquired = FALSE; + + ACPI_FUNCTION_TRACE(ev_acquire_global_lock); + + /* + * Only one thread can acquire the GL at a time, the global_lock_mutex + * enforces this. This interface releases the interpreter if we must wait. + */ + status = + acpi_ex_system_wait_mutex(acpi_gbl_global_lock_mutex->mutex. + os_mutex, timeout); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* + * Update the global lock handle and check for wraparound. The handle is + * only used for the external global lock interfaces, but it is updated + * here to properly handle the case where a single thread may acquire the + * lock via both the AML and the acpi_acquire_global_lock interfaces. The + * handle is therefore updated on the first acquire from a given thread + * regardless of where the acquisition request originated. + */ + acpi_gbl_global_lock_handle++; + if (acpi_gbl_global_lock_handle == 0) { + acpi_gbl_global_lock_handle = 1; + } + + /* + * Make sure that a global lock actually exists. If not, just + * treat the lock as a standard mutex. + */ + if (!acpi_gbl_global_lock_present) { + acpi_gbl_global_lock_acquired = TRUE; + return_ACPI_STATUS(AE_OK); + } + + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); + + do { + + /* Attempt to acquire the actual hardware lock */ + + ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired); + if (acquired) { + acpi_gbl_global_lock_acquired = TRUE; + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Acquired hardware Global Lock\n")); + break; + } + + /* + * Did not get the lock. The pending bit was set above, and + * we must now wait until we receive the global lock + * released interrupt. + */ + acpi_gbl_global_lock_pending = TRUE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Waiting for hardware Global Lock\n")); + + /* + * Wait for handshake with the global lock interrupt handler. + * This interface releases the interpreter if we must wait. + */ + status = + acpi_ex_system_wait_semaphore + (acpi_gbl_global_lock_semaphore, ACPI_WAIT_FOREVER); + + flags = acpi_os_acquire_lock(acpi_gbl_global_lock_pending_lock); + + } while (ACPI_SUCCESS(status)); + + acpi_gbl_global_lock_pending = FALSE; + acpi_os_release_lock(acpi_gbl_global_lock_pending_lock, flags); + + return_ACPI_STATUS(status); +} + +/******************************************************************************* + * + * FUNCTION: acpi_ev_release_global_lock + * + * PARAMETERS: None + * + * RETURN: Status + * + * DESCRIPTION: Releases ownership of the Global Lock. + * + ******************************************************************************/ + +acpi_status acpi_ev_release_global_lock(void) +{ + u8 pending = FALSE; + acpi_status status = AE_OK; + + ACPI_FUNCTION_TRACE(ev_release_global_lock); + + /* Lock must be already acquired */ + + if (!acpi_gbl_global_lock_acquired) { + ACPI_WARNING((AE_INFO, + "Cannot release the ACPI Global Lock, it has not been acquired")); + return_ACPI_STATUS(AE_NOT_ACQUIRED); + } + + if (acpi_gbl_global_lock_present) { + + /* Allow any thread to release the lock */ + + ACPI_RELEASE_GLOBAL_LOCK(acpi_gbl_FACS, pending); + + /* + * If the pending bit was set, we must write GBL_RLS to the control + * register + */ + if (pending) { + status = + acpi_write_bit_register + (ACPI_BITREG_GLOBAL_LOCK_RELEASE, + ACPI_ENABLE_EVENT); + } + + ACPI_DEBUG_PRINT((ACPI_DB_EXEC, + "Released hardware Global Lock\n")); + } + + acpi_gbl_global_lock_acquired = FALSE; + + /* Release the local GL mutex */ + + acpi_os_release_mutex(acpi_gbl_global_lock_mutex->mutex.os_mutex); + return_ACPI_STATUS(status); +} diff --git a/drivers/acpi/acpica/evmisc.c b/drivers/acpi/acpica/evmisc.c index 7dc80946f7b..d0b33184442 100644 --- a/drivers/acpi/acpica/evmisc.c +++ b/drivers/acpi/acpica/evmisc.c @@ -45,7 +45,6 @@ #include "accommon.h" #include "acevents.h" #include "acnamesp.h" -#include "acinterp.h" #define _COMPONENT ACPI_EVENTS ACPI_MODULE_NAME("evmisc") @@ -53,10 +52,6 @@ ACPI_MODULE_NAME("evmisc") /* Local prototypes */ static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context); -static u32 acpi_ev_global_lock_handler(void *context); - -static acpi_status acpi_ev_remove_global_lock_handler(void); - /******************************************************************************* * * FUNCTION: acpi_ev_is_notify_object @@ -275,304 +270,6 @@ static void ACPI_SYSTEM_XFACE acpi_ev_notify_dispatch(void *context) acpi_ut_delete_generic_state(notify_info); } -/******************************************************************************* - * - * FUNCTION: acpi_ev_global_lock_handler - * - * PARAMETERS: Context - From thread interface, not used - * - * RETURN: ACPI_INTERRUPT_HANDLED - * - * DESCRIPTION: Invoked directly from the SCI handler when a global lock - * release interrupt occurs. If there's a thread waiting for - * the global lock, signal it. - * - * NOTE: Assumes that the semaphore can be signaled from interrupt level. If - * this is not possible for some reason, a separate thread will have to be - * scheduled to do this. - * - ******************************************************************************/ -static u8 acpi_ev_global_lock_pending; - -static u32 acpi_ev_global_lock_handler(void *context) -{ - acpi_status status; - acpi_cpu_flags flags; - - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); - - if (!acpi_ev_global_lock_pending) { - goto out; - } - - /* Send a unit to the semaphore */ - - status = acpi_os_signal_semaphore(acpi_gbl_global_lock_semaphore, 1); - if (ACPI_FAILURE(status)) { - ACPI_ERROR((AE_INFO, "Could not signal Global Lock semaphore")); - } - - acpi_ev_global_lock_pending = FALSE; - - out: - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); - - return (ACPI_INTERRUPT_HANDLED); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ev_init_global_lock_handler - * - * PARAMETERS: None - * - * RETURN: Status - * - * DESCRIPTION: Install a handler for the global lock release event - * - ******************************************************************************/ - -acpi_status acpi_ev_init_global_lock_handler(void) -{ - acpi_status status; - - ACPI_FUNCTION_TRACE(ev_init_global_lock_handler); - - /* Attempt installation of the global lock handler */ - - status = acpi_install_fixed_event_handler(ACPI_EVENT_GLOBAL, - acpi_ev_global_lock_handler, - NULL); - - /* - * If the global lock does not exist on this platform, the attempt to - * enable GBL_STATUS will fail (the GBL_ENABLE bit will not stick). - * Map to AE_OK, but mark global lock as not present. Any attempt to - * actually use the global lock will be flagged with an error. - */ - if (status == AE_NO_HARDWARE_RESPONSE) { - ACPI_ERROR((AE_INFO, - "No response from Global Lock hardware, disabling lock")); - - acpi_gbl_global_lock_present = FALSE; - return_ACPI_STATUS(AE_OK); - } - - acpi_gbl_global_lock_present = TRUE; - return_ACPI_STATUS(status); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ev_remove_global_lock_handler - * - * PARAMETERS: None - * - * RETURN: Status - * - * DESCRIPTION: Remove the handler for the Global Lock - * - ******************************************************************************/ - -static acpi_status acpi_ev_remove_global_lock_handler(void) -{ - acpi_status status; - - ACPI_FUNCTION_TRACE(ev_remove_global_lock_handler); - - acpi_gbl_global_lock_present = FALSE; - status = acpi_remove_fixed_event_handler(ACPI_EVENT_GLOBAL, - acpi_ev_global_lock_handler); - - return_ACPI_STATUS(status); -} - -/****************************************************************************** - * - * FUNCTION: acpi_ev_acquire_global_lock - * - * PARAMETERS: Timeout - Max time to wait for the lock, in millisec. - * - * RETURN: Status - * - * DESCRIPTION: Attempt to gain ownership of the Global Lock. - * - * MUTEX: Interpreter must be locked - * - * Note: The original implementation allowed multiple threads to "acquire" the - * Global Lock, and the OS would hold the lock until the last thread had - * released it. However, this could potentially starve the BIOS out of the - * lock, especially in the case where there is a tight handshake between the - * Embedded Controller driver and the BIOS. Therefore, this implementation - * allows only one thread to acquire the HW Global Lock at a time, and makes - * the global lock appear as a standard mutex on the OS side. - * - *****************************************************************************/ -static acpi_thread_id acpi_ev_global_lock_thread_id; -static int acpi_ev_global_lock_acquired; - -acpi_status acpi_ev_acquire_global_lock(u16 timeout) -{ - acpi_cpu_flags flags; - acpi_status status = AE_OK; - u8 acquired = FALSE; - - ACPI_FUNCTION_TRACE(ev_acquire_global_lock); - - /* - * Only one thread can acquire the GL at a time, the global_lock_mutex - * enforces this. This interface releases the interpreter if we must wait. - */ - status = acpi_ex_system_wait_mutex( - acpi_gbl_global_lock_mutex->mutex.os_mutex, 0); - if (status == AE_TIME) { - if (acpi_ev_global_lock_thread_id == acpi_os_get_thread_id()) { - acpi_ev_global_lock_acquired++; - return AE_OK; - } - } - - if (ACPI_FAILURE(status)) { - status = acpi_ex_system_wait_mutex( - acpi_gbl_global_lock_mutex->mutex.os_mutex, - timeout); - } - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - acpi_ev_global_lock_thread_id = acpi_os_get_thread_id(); - acpi_ev_global_lock_acquired++; - - /* - * Update the global lock handle and check for wraparound. The handle is - * only used for the external global lock interfaces, but it is updated - * here to properly handle the case where a single thread may acquire the - * lock via both the AML and the acpi_acquire_global_lock interfaces. The - * handle is therefore updated on the first acquire from a given thread - * regardless of where the acquisition request originated. - */ - acpi_gbl_global_lock_handle++; - if (acpi_gbl_global_lock_handle == 0) { - acpi_gbl_global_lock_handle = 1; - } - - /* - * Make sure that a global lock actually exists. If not, just treat the - * lock as a standard mutex. - */ - if (!acpi_gbl_global_lock_present) { - acpi_gbl_global_lock_acquired = TRUE; - return_ACPI_STATUS(AE_OK); - } - - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); - - do { - - /* Attempt to acquire the actual hardware lock */ - - ACPI_ACQUIRE_GLOBAL_LOCK(acpi_gbl_FACS, acquired); - if (acquired) { - acpi_gbl_global_lock_acquired = TRUE; - - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "Acquired hardware Global Lock\n")); - break; - } - - acpi_ev_global_lock_pending = TRUE; - - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); - - /* - * Did not get the lock. The pending bit was set above, and we - * must wait until we get the global lock released interrupt. - */ - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "Waiting for hardware Global Lock\n")); - - /* - * Wait for handshake with the global lock interrupt handler. - * This interface releases the interpreter if we must wait. - */ - status = acpi_ex_system_wait_semaphore( - acpi_gbl_global_lock_semaphore, - ACPI_WAIT_FOREVER); - - flags = acpi_os_acquire_lock(acpi_ev_global_lock_pending_lock); - - } while (ACPI_SUCCESS(status)); - - acpi_ev_global_lock_pending = FALSE; - - acpi_os_release_lock(acpi_ev_global_lock_pending_lock, flags); - - return_ACPI_STATUS(status); -} - -/******************************************************************************* - * - * FUNCTION: acpi_ev_release_global_lock - * - * PARAMETERS: None - * - * RETURN: Status - * - * DESCRIPTION: Releases ownership of the Global Lock. - * - ******************************************************************************/ - -acpi_status acpi_ev_release_global_lock(void) -{ - u8 pending = FALSE; - acpi_status status = AE_OK; - - ACPI_FUNCTION_TRACE(ev_release_global_lock); - - /* Lock must be already acquired */ - - if (!acpi_gbl_global_lock_acquired) { - ACPI_WARNING((AE_INFO, - "Cannot release the ACPI Global Lock, it has not been acquired")); - return_ACPI_STATUS(AE_NOT_ACQUIRED); - } - - acpi_ev_global_lock_acquired--; - if (acpi_ev_global_lock_acquired > 0) { - return AE_OK; - } - - if (acpi_gbl_global_lock_present) { - - /* Allow any thread to release the lock */ - - ACPI_RELEASE_GLOBAL_LOCK(acpi_gbl_FACS, pending); - - /* - * If the pending bit was set, we must write GBL_RLS to the control - * register - */ - if (pending) { - status = - acpi_write_bit_register - (ACPI_BITREG_GLOBAL_LOCK_RELEASE, - ACPI_ENABLE_EVENT); - } - - ACPI_DEBUG_PRINT((ACPI_DB_EXEC, - "Released hardware Global Lock\n")); - } - - acpi_gbl_global_lock_acquired = FALSE; - - /* Release the local GL mutex */ - acpi_ev_global_lock_thread_id = 0; - acpi_ev_global_lock_acquired = 0; - acpi_os_release_mutex(acpi_gbl_global_lock_mutex->mutex.os_mutex); - return_ACPI_STATUS(status); -} - /****************************************************************************** * * FUNCTION: acpi_ev_terminate diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index bea7223d7a7..f0edf5c43c0 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -55,6 +55,8 @@ static u8 acpi_ev_has_default_handler(struct acpi_namespace_node *node, acpi_adr_space_type space_id); +static void acpi_ev_orphan_ec_reg_method(void); + static acpi_status acpi_ev_reg_run(acpi_handle obj_handle, u32 level, void *context, void **return_value); @@ -561,7 +563,9 @@ acpi_ev_detach_region(union acpi_operand_object *region_obj, /* Now stop region accesses by executing the _REG method */ - status = acpi_ev_execute_reg_method(region_obj, 0); + status = + acpi_ev_execute_reg_method(region_obj, + ACPI_REG_DISCONNECT); if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "from region _REG, [%s]", @@ -1062,6 +1066,12 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, NULL, &space_id, NULL); + /* Special case for EC: handle "orphan" _REG methods with no region */ + + if (space_id == ACPI_ADR_SPACE_EC) { + acpi_ev_orphan_ec_reg_method(); + } + return_ACPI_STATUS(status); } @@ -1120,6 +1130,113 @@ acpi_ev_reg_run(acpi_handle obj_handle, return (AE_OK); } - status = acpi_ev_execute_reg_method(obj_desc, 1); + status = acpi_ev_execute_reg_method(obj_desc, ACPI_REG_CONNECT); return (status); } + +/******************************************************************************* + * + * FUNCTION: acpi_ev_orphan_ec_reg_method + * + * PARAMETERS: None + * + * RETURN: None + * + * DESCRIPTION: Execute an "orphan" _REG method that appears under the EC + * device. This is a _REG method that has no corresponding region + * within the EC device scope. The orphan _REG method appears to + * have been enabled by the description of the ECDT in the ACPI + * specification: "The availability of the region space can be + * detected by providing a _REG method object underneath the + * Embedded Controller device." + * + * To quickly access the EC device, we use the EC_ID that appears + * within the ECDT. Otherwise, we would need to perform a time- + * consuming namespace walk, executing _HID methods to find the + * EC device. + * + ******************************************************************************/ + +static void acpi_ev_orphan_ec_reg_method(void) +{ + struct acpi_table_ecdt *table; + acpi_status status; + struct acpi_object_list args; + union acpi_object objects[2]; + struct acpi_namespace_node *ec_device_node; + struct acpi_namespace_node *reg_method; + struct acpi_namespace_node *next_node; + + ACPI_FUNCTION_TRACE(ev_orphan_ec_reg_method); + + /* Get the ECDT (if present in system) */ + + status = acpi_get_table(ACPI_SIG_ECDT, 0, + ACPI_CAST_INDIRECT_PTR(struct acpi_table_header, + &table)); + if (ACPI_FAILURE(status)) { + return_VOID; + } + + /* We need a valid EC_ID string */ + + if (!(*table->id)) { + return_VOID; + } + + /* Namespace is currently locked, must release */ + + (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); + + /* Get a handle to the EC device referenced in the ECDT */ + + status = acpi_get_handle(NULL, + ACPI_CAST_PTR(char, table->id), + ACPI_CAST_PTR(acpi_handle, &ec_device_node)); + if (ACPI_FAILURE(status)) { + goto exit; + } + + /* Get a handle to a _REG method immediately under the EC device */ + + status = acpi_get_handle(ec_device_node, + METHOD_NAME__REG, ACPI_CAST_PTR(acpi_handle, + ®_method)); + if (ACPI_FAILURE(status)) { + goto exit; + } + + /* + * Execute the _REG method only if there is no Operation Region in + * this scope with the Embedded Controller space ID. Otherwise, it + * will already have been executed. Note, this allows for Regions + * with other space IDs to be present; but the code below will then + * execute the _REG method with the EC space ID argument. + */ + next_node = acpi_ns_get_next_node(ec_device_node, NULL); + while (next_node) { + if ((next_node->type == ACPI_TYPE_REGION) && + (next_node->object) && + (next_node->object->region.space_id == ACPI_ADR_SPACE_EC)) { + goto exit; /* Do not execute _REG */ + } + next_node = acpi_ns_get_next_node(ec_device_node, next_node); + } + + /* Evaluate the _REG(EC,Connect) method */ + + args.count = 2; + args.pointer = objects; + objects[0].type = ACPI_TYPE_INTEGER; + objects[0].integer.value = ACPI_ADR_SPACE_EC; + objects[1].type = ACPI_TYPE_INTEGER; + objects[1].integer.value = ACPI_REG_CONNECT; + + status = acpi_evaluate_object(reg_method, NULL, &args, NULL); + + exit: + /* We ignore all errors from above, don't care */ + + status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); + return_VOID; +} diff --git a/drivers/acpi/acpica/evrgnini.c b/drivers/acpi/acpica/evrgnini.c index 9659cee6093..55a5d35ef34 100644 --- a/drivers/acpi/acpica/evrgnini.c +++ b/drivers/acpi/acpica/evrgnini.c @@ -637,7 +637,7 @@ acpi_ev_initialize_region(union acpi_operand_object *region_obj, status = acpi_ev_execute_reg_method - (region_obj, 1); + (region_obj, ACPI_REG_CONNECT); if (acpi_ns_locked) { status = diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index c85c8c45599..00cd95692a9 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -130,20 +130,21 @@ acpi_install_address_space_handler(acpi_handle device, case ACPI_ADR_SPACE_PCI_CONFIG: case ACPI_ADR_SPACE_DATA_TABLE: - if (acpi_gbl_reg_methods_executed) { + if (!acpi_gbl_reg_methods_executed) { - /* Run all _REG methods for this address space */ - - status = acpi_ev_execute_reg_methods(node, space_id); + /* We will defer execution of the _REG methods for this space */ + goto unlock_and_exit; } break; default: - - status = acpi_ev_execute_reg_methods(node, space_id); break; } + /* Run all _REG methods for this address space */ + + status = acpi_ev_execute_reg_methods(node, space_id); + unlock_and_exit: (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); return_ACPI_STATUS(status); diff --git a/drivers/acpi/acpica/excreate.c b/drivers/acpi/acpica/excreate.c index e7b372d1766..110711afada 100644 --- a/drivers/acpi/acpica/excreate.c +++ b/drivers/acpi/acpica/excreate.c @@ -305,7 +305,8 @@ acpi_ex_create_region(u8 * aml_start, * range */ if ((region_space >= ACPI_NUM_PREDEFINED_REGIONS) && - (region_space < ACPI_USER_REGION_BEGIN)) { + (region_space < ACPI_USER_REGION_BEGIN) && + (region_space != ACPI_ADR_SPACE_DATA_TABLE)) { ACPI_ERROR((AE_INFO, "Invalid AddressSpace type 0x%X", region_space)); return_ACPI_STATUS(AE_AML_INVALID_SPACE_ID); diff --git a/drivers/acpi/acpica/nsrepair.c b/drivers/acpi/acpica/nsrepair.c index 1d76ac85b5e..ac7b854b0bd 100644 --- a/drivers/acpi/acpica/nsrepair.c +++ b/drivers/acpi/acpica/nsrepair.c @@ -74,7 +74,6 @@ ACPI_MODULE_NAME("nsrepair") * * Additional possible repairs: * - * Optional/unnecessary NULL package elements removed * Required package elements that are NULL replaced by Integer/String/Buffer * Incorrect standalone package wrapped with required outer package * @@ -623,16 +622,12 @@ acpi_ns_remove_null_elements(struct acpi_predefined_data *data, ACPI_FUNCTION_NAME(ns_remove_null_elements); /* - * PTYPE1 packages contain no subpackages. - * PTYPE2 packages contain a variable number of sub-packages. We can - * safely remove all NULL elements from the PTYPE2 packages. + * We can safely remove all NULL elements from these package types: + * PTYPE1_VAR packages contain a variable number of simple data types. + * PTYPE2 packages contain a variable number of sub-packages. */ switch (package_type) { - case ACPI_PTYPE1_FIXED: case ACPI_PTYPE1_VAR: - case ACPI_PTYPE1_OPTION: - return; - case ACPI_PTYPE2: case ACPI_PTYPE2_COUNT: case ACPI_PTYPE2_PKG_COUNT: @@ -642,6 +637,8 @@ acpi_ns_remove_null_elements(struct acpi_predefined_data *data, break; default: + case ACPI_PTYPE1_FIXED: + case ACPI_PTYPE1_OPTION: return; } diff --git a/drivers/acpi/acpica/utdecode.c b/drivers/acpi/acpica/utdecode.c index 136a814cec6..97cb36f85ce 100644 --- a/drivers/acpi/acpica/utdecode.c +++ b/drivers/acpi/acpica/utdecode.c @@ -170,8 +170,7 @@ const char *acpi_gbl_region_types[ACPI_NUM_PREDEFINED_REGIONS] = { "SMBus", "SystemCMOS", "PCIBARTarget", - "IPMI", - "DataTable" + "IPMI" }; char *acpi_ut_get_region_name(u8 space_id) @@ -179,6 +178,8 @@ char *acpi_ut_get_region_name(u8 space_id) if (space_id >= ACPI_USER_REGION_BEGIN) { return ("UserDefinedRegion"); + } else if (space_id == ACPI_ADR_SPACE_DATA_TABLE) { + return ("DataTable"); } else if (space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) { return ("FunctionalFixedHW"); } else if (space_id >= ACPI_NUM_PREDEFINED_REGIONS) { diff --git a/drivers/acpi/acpica/utmutex.c b/drivers/acpi/acpica/utmutex.c index a946c689f03..7d797e2baec 100644 --- a/drivers/acpi/acpica/utmutex.c +++ b/drivers/acpi/acpica/utmutex.c @@ -83,9 +83,15 @@ acpi_status acpi_ut_mutex_initialize(void) /* Create the spinlocks for use at interrupt level */ - spin_lock_init(acpi_gbl_gpe_lock); - spin_lock_init(acpi_gbl_hardware_lock); - spin_lock_init(acpi_ev_global_lock_pending_lock); + status = acpi_os_create_lock (&acpi_gbl_gpe_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } + + status = acpi_os_create_lock (&acpi_gbl_hardware_lock); + if (ACPI_FAILURE (status)) { + return_ACPI_STATUS (status); + } /* Mutex for _OSI support */ status = acpi_os_create_mutex(&acpi_gbl_osi_mutex); diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 9749980ca6c..d1e06c182cd 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -227,7 +227,7 @@ static int __acpi_bus_set_power(struct acpi_device *device, int state) acpi_status status = AE_OK; char object_name[5] = { '_', 'P', 'S', '0' + state, '\0' }; - if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3)) + if (!device || (state < ACPI_STATE_D0) || (state > ACPI_STATE_D3_COLD)) return -EINVAL; /* Make sure this is a valid target state */ diff --git a/drivers/acpi/custom_method.c b/drivers/acpi/custom_method.c new file mode 100644 index 00000000000..5d42c2414ae --- /dev/null +++ b/drivers/acpi/custom_method.c @@ -0,0 +1,100 @@ +/* + * debugfs.c - ACPI debugfs interface to userspace. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/uaccess.h> +#include <linux/debugfs.h> +#include <acpi/acpi_drivers.h> + +#include "internal.h" + +#define _COMPONENT ACPI_SYSTEM_COMPONENT +ACPI_MODULE_NAME("custom_method"); +MODULE_LICENSE("GPL"); + +static struct dentry *cm_dentry; + +/* /sys/kernel/debug/acpi/custom_method */ + +static ssize_t cm_write(struct file *file, const char __user * user_buf, + size_t count, loff_t *ppos) +{ + static char *buf; + static u32 max_size; + static u32 uncopied_bytes; + + struct acpi_table_header table; + acpi_status status; + + if (!(*ppos)) { + /* parse the table header to get the table length */ + if (count <= sizeof(struct acpi_table_header)) + return -EINVAL; + if (copy_from_user(&table, user_buf, + sizeof(struct acpi_table_header))) + return -EFAULT; + uncopied_bytes = max_size = table.length; + buf = kzalloc(max_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } + + if (buf == NULL) + return -EINVAL; + + if ((*ppos > max_size) || + (*ppos + count > max_size) || + (*ppos + count < count) || + (count > uncopied_bytes)) + return -EINVAL; + + if (copy_from_user(buf + (*ppos), user_buf, count)) { + kfree(buf); + buf = NULL; + return -EFAULT; + } + + uncopied_bytes -= count; + *ppos += count; + + if (!uncopied_bytes) { + status = acpi_install_method(buf); + kfree(buf); + buf = NULL; + if (ACPI_FAILURE(status)) + return -EINVAL; + add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); + } + + return count; +} + +static const struct file_operations cm_fops = { + .write = cm_write, + .llseek = default_llseek, +}; + +static int __init acpi_custom_method_init(void) +{ + if (acpi_debugfs_dir == NULL) + return -ENOENT; + + cm_dentry = debugfs_create_file("custom_method", S_IWUSR, + acpi_debugfs_dir, NULL, &cm_fops); + if (cm_dentry == NULL) + return -ENODEV; + + return 0; +} + +static void __exit acpi_custom_method_exit(void) +{ + if (cm_dentry) + debugfs_remove(cm_dentry); + } + +module_init(acpi_custom_method_init); +module_exit(acpi_custom_method_exit); diff --git a/drivers/acpi/debugfs.c b/drivers/acpi/debugfs.c index 384f7abcff7..182a9fc3635 100644 --- a/drivers/acpi/debugfs.c +++ b/drivers/acpi/debugfs.c @@ -3,100 +3,16 @@ */ #include <linux/init.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/uaccess.h> #include <linux/debugfs.h> #include <acpi/acpi_drivers.h> #define _COMPONENT ACPI_SYSTEM_COMPONENT ACPI_MODULE_NAME("debugfs"); +struct dentry *acpi_debugfs_dir; +EXPORT_SYMBOL_GPL(acpi_debugfs_dir); -/* /sys/modules/acpi/parameters/aml_debug_output */ - -module_param_named(aml_debug_output, acpi_gbl_enable_aml_debug_object, - bool, 0644); -MODULE_PARM_DESC(aml_debug_output, - "To enable/disable the ACPI Debug Object output."); - -/* /sys/kernel/debug/acpi/custom_method */ - -static ssize_t cm_write(struct file *file, const char __user * user_buf, - size_t count, loff_t *ppos) +void __init acpi_debugfs_init(void) { - static char *buf; - static u32 max_size; - static u32 uncopied_bytes; - - struct acpi_table_header table; - acpi_status status; - - if (!(*ppos)) { - /* parse the table header to get the table length */ - if (count <= sizeof(struct acpi_table_header)) - return -EINVAL; - if (copy_from_user(&table, user_buf, - sizeof(struct acpi_table_header))) - return -EFAULT; - uncopied_bytes = max_size = table.length; - buf = kzalloc(max_size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - } - - if (buf == NULL) - return -EINVAL; - - if ((*ppos > max_size) || - (*ppos + count > max_size) || - (*ppos + count < count) || - (count > uncopied_bytes)) - return -EINVAL; - - if (copy_from_user(buf + (*ppos), user_buf, count)) { - kfree(buf); - buf = NULL; - return -EFAULT; - } - - uncopied_bytes -= count; - *ppos += count; - - if (!uncopied_bytes) { - status = acpi_install_method(buf); - kfree(buf); - buf = NULL; - if (ACPI_FAILURE(status)) - return -EINVAL; - add_taint(TAINT_OVERRIDDEN_ACPI_TABLE); - } - - return count; -} - -static const struct file_operations cm_fops = { - .write = cm_write, - .llseek = default_llseek, -}; - -int __init acpi_debugfs_init(void) -{ - struct dentry *acpi_dir, *cm_dentry; - - acpi_dir = debugfs_create_dir("acpi", NULL); - if (!acpi_dir) - goto err; - - cm_dentry = debugfs_create_file("custom_method", S_IWUSR, - acpi_dir, NULL, &cm_fops); - if (!cm_dentry) - goto err; - - return 0; - -err: - if (acpi_dir) - debugfs_remove(acpi_dir); - return -EINVAL; + acpi_debugfs_dir = debugfs_create_dir("acpi", NULL); } diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index fa848c4116a..b19a18dd994 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -69,7 +69,6 @@ enum ec_command { #define ACPI_EC_DELAY 500 /* Wait 500ms max. during EC ops */ #define ACPI_EC_UDELAY_GLK 1000 /* Wait 1ms max. to get global lock */ -#define ACPI_EC_CDELAY 10 /* Wait 10us before polling EC */ #define ACPI_EC_MSI_UDELAY 550 /* Wait 550us for MSI EC */ #define ACPI_EC_STORM_THRESHOLD 8 /* number of false interrupts @@ -433,8 +432,7 @@ EXPORT_SYMBOL(ec_write); int ec_transaction(u8 command, const u8 * wdata, unsigned wdata_len, - u8 * rdata, unsigned rdata_len, - int force_poll) + u8 * rdata, unsigned rdata_len) { struct transaction t = {.command = command, .wdata = wdata, .rdata = rdata, @@ -592,8 +590,6 @@ static void acpi_ec_gpe_query(void *ec_cxt) mutex_unlock(&ec->lock); } -static void acpi_ec_gpe_query(void *ec_cxt); - static int ec_check_sci(struct acpi_ec *ec, u8 state) { if (state & ACPI_EC_FLAG_SCI) { @@ -808,8 +804,6 @@ static int acpi_ec_add(struct acpi_device *device) return -EINVAL; } - ec->handle = device->handle; - /* Find and register all query methods */ acpi_walk_namespace(ACPI_TYPE_METHOD, ec->handle, 1, acpi_ec_register_query_methods, NULL, ec, NULL); @@ -938,8 +932,19 @@ static struct dmi_system_id __initdata ec_dmi_table[] = { ec_flag_msi, "MSI hardware", { DMI_MATCH(DMI_CHASSIS_VENDOR, "MICRO-STAR")}, NULL}, { + ec_flag_msi, "Quanta hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "Quanta"), + DMI_MATCH(DMI_PRODUCT_NAME, "TW8/SW8/DW8"),}, NULL}, + { + ec_flag_msi, "Quanta hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "Quanta"), + DMI_MATCH(DMI_PRODUCT_NAME, "TW9/SW9"),}, NULL}, + { ec_validate_ecdt, "ASUS hardware", { DMI_MATCH(DMI_BIOS_VENDOR, "ASUS") }, NULL}, + { + ec_validate_ecdt, "ASUS hardware", { + DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer Inc.") }, NULL}, {}, }; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 4bfb759deb1..ca75b9ce048 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -28,9 +28,10 @@ int acpi_scan_init(void); int acpi_sysfs_init(void); #ifdef CONFIG_DEBUG_FS +extern struct dentry *acpi_debugfs_dir; int acpi_debugfs_init(void); #else -static inline int acpi_debugfs_init(void) { return 0; } +static inline void acpi_debugfs_init(void) { return; } #endif /* -------------------------------------------------------------------------- diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 45ad4ffef53..52ca9649d76 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -902,14 +902,6 @@ void acpi_os_wait_events_complete(void *context) EXPORT_SYMBOL(acpi_os_wait_events_complete); -/* - * Deallocate the memory for a spinlock. - */ -void acpi_os_delete_lock(acpi_spinlock handle) -{ - return; -} - acpi_status acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle) { @@ -1341,6 +1333,31 @@ int acpi_resources_are_enforced(void) EXPORT_SYMBOL(acpi_resources_are_enforced); /* + * Create and initialize a spinlock. + */ +acpi_status +acpi_os_create_lock(acpi_spinlock *out_handle) +{ + spinlock_t *lock; + + lock = ACPI_ALLOCATE(sizeof(spinlock_t)); + if (!lock) + return AE_NO_MEMORY; + spin_lock_init(lock); + *out_handle = lock; + + return AE_OK; +} + +/* + * Deallocate the memory for a spinlock. + */ +void acpi_os_delete_lock(acpi_spinlock handle) +{ + ACPI_FREE(handle); +} + +/* * Acquire a spinlock. * * handle is a pointer to the spinlock_t. diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 25bf17da69f..02d2a4c9084 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -37,7 +37,6 @@ static struct dmi_system_id __initdata processor_idle_dmi_table[] = { {}, }; -#ifdef CONFIG_SMP static int map_lapic_id(struct acpi_subtable_header *entry, u32 acpi_id, int *apic_id) { @@ -165,7 +164,9 @@ exit: int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) { +#ifdef CONFIG_SMP int i; +#endif int apic_id = -1; apic_id = map_mat_entry(handle, type, acpi_id); @@ -174,14 +175,19 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) if (apic_id == -1) return apic_id; +#ifdef CONFIG_SMP for_each_possible_cpu(i) { if (cpu_physical_id(i) == apic_id) return i; } +#else + /* In UP kernel, only processor 0 is valid */ + if (apic_id == 0) + return apic_id; +#endif return -1; } EXPORT_SYMBOL_GPL(acpi_get_cpuid); -#endif static bool __init processor_physically_present(acpi_handle handle) { @@ -217,7 +223,7 @@ static bool __init processor_physically_present(acpi_handle handle) type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0; cpuid = acpi_get_cpuid(handle, type, acpi_id); - if ((cpuid == -1) && (num_possible_cpus() > 1)) + if (cpuid == -1) return false; return true; diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index d615b7d69bc..431ab11c8c1 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -161,7 +161,7 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr, if (cpu_has(&cpu_data(pr->id), X86_FEATURE_ARAT)) return; - if (c1e_detected) + if (amd_e400_c1e_detected) type = ACPI_STATE_C1; /* diff --git a/drivers/acpi/sysfs.c b/drivers/acpi/sysfs.c index 61891e75583..77255f250db 100644 --- a/drivers/acpi/sysfs.c +++ b/drivers/acpi/sysfs.c @@ -220,6 +220,14 @@ module_param_call(trace_state, param_set_trace_state, param_get_trace_state, NULL, 0644); #endif /* CONFIG_ACPI_DEBUG */ + +/* /sys/modules/acpi/parameters/aml_debug_output */ + +module_param_named(aml_debug_output, acpi_gbl_enable_aml_debug_object, + bool, 0644); +MODULE_PARM_DESC(aml_debug_output, + "To enable/disable the ACPI Debug Object output."); + /* /sys/module/acpi/parameters/acpica_version */ static int param_get_acpica_version(char *buffer, struct kernel_param *kp) { diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index db8f88586c8..98de8f41867 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1038,6 +1038,7 @@ static void floppy_disable_hlt(void) { unsigned long flags; + WARN_ONCE(1, "floppy_disable_hlt() scheduled for removal in 2012"); spin_lock_irqsave(&floppy_hlt_lock, flags); if (!hlt_disabled) { hlt_disabled = 1; diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index a0aabd904a5..46b8136c31b 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -321,7 +321,6 @@ static void pcd_init_units(void) strcpy(disk->disk_name, cd->name); /* umm... */ disk->fops = &pcd_bdops; disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - disk->events = DISK_EVENT_MEDIA_CHANGE; } } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6ecf89cdf00..079c08808d8 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -6,10 +6,13 @@ #include <linux/virtio.h> #include <linux/virtio_blk.h> #include <linux/scatterlist.h> +#include <linux/string_helpers.h> +#include <scsi/scsi_cmnd.h> #define PART_BITS 4 static int major, index; +struct workqueue_struct *virtblk_wq; struct virtio_blk { @@ -26,6 +29,9 @@ struct virtio_blk mempool_t *pool; + /* Process context for config space updates */ + struct work_struct config_work; + /* What host tells us, plus 2 for header & tailer. */ unsigned int sg_elems; @@ -141,7 +147,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, num = blk_rq_map_sg(q, vbr->req, vblk->sg + out); if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) { - sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96); + sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE); sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr, sizeof(vbr->in_hdr)); } @@ -291,6 +297,46 @@ static ssize_t virtblk_serial_show(struct device *dev, } DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); +static void virtblk_config_changed_work(struct work_struct *work) +{ + struct virtio_blk *vblk = + container_of(work, struct virtio_blk, config_work); + struct virtio_device *vdev = vblk->vdev; + struct request_queue *q = vblk->disk->queue; + char cap_str_2[10], cap_str_10[10]; + u64 capacity, size; + + /* Host must always specify the capacity. */ + vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), + &capacity, sizeof(capacity)); + + /* If capacity is too big, truncate with warning. */ + if ((sector_t)capacity != capacity) { + dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", + (unsigned long long)capacity); + capacity = (sector_t)-1; + } + + size = capacity * queue_logical_block_size(q); + string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); + string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); + + dev_notice(&vdev->dev, + "new size: %llu %d-byte logical blocks (%s/%s)\n", + (unsigned long long)capacity, + queue_logical_block_size(q), + cap_str_10, cap_str_2); + + set_capacity(vblk->disk, capacity); +} + +static void virtblk_config_changed(struct virtio_device *vdev) +{ + struct virtio_blk *vblk = vdev->priv; + + queue_work(virtblk_wq, &vblk->config_work); +} + static int __devinit virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -327,6 +373,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) vblk->vdev = vdev; vblk->sg_elems = sg_elems; sg_init_table(vblk->sg, vblk->sg_elems); + INIT_WORK(&vblk->config_work, virtblk_config_changed_work); /* We expect one virtqueue, for output. */ vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); @@ -477,6 +524,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + flush_work(&vblk->config_work); + /* Nothing should be pending. */ BUG_ON(!list_empty(&vblk->reqs)); @@ -508,27 +557,47 @@ static unsigned int features[] = { * Use __refdata to avoid this warning. */ static struct virtio_driver __refdata virtio_blk = { - .feature_table = features, - .feature_table_size = ARRAY_SIZE(features), - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = virtblk_probe, - .remove = __devexit_p(virtblk_remove), + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtblk_probe, + .remove = __devexit_p(virtblk_remove), + .config_changed = virtblk_config_changed, }; static int __init init(void) { + int error; + + virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); + if (!virtblk_wq) + return -ENOMEM; + major = register_blkdev(0, "virtblk"); - if (major < 0) - return major; - return register_virtio_driver(&virtio_blk); + if (major < 0) { + error = major; + goto out_destroy_workqueue; + } + + error = register_virtio_driver(&virtio_blk); + if (error) + goto out_unregister_blkdev; + return 0; + +out_unregister_blkdev: + unregister_blkdev(major, "virtblk"); +out_destroy_workqueue: + destroy_workqueue(virtblk_wq); + return error; } static void __exit fini(void) { unregister_blkdev(major, "virtblk"); unregister_virtio_driver(&virtio_blk); + destroy_workqueue(virtblk_wq); } module_init(init); module_exit(fini); diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index b3f01996318..48ad2a7ab08 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -355,29 +355,24 @@ static void hci_uart_tty_wakeup(struct tty_struct *tty) * flags pointer to flags for data * count count of received data in bytes * - * Return Value: Number of bytes received + * Return Value: None */ -static unsigned int hci_uart_tty_receive(struct tty_struct *tty, - const u8 *data, char *flags, int count) +static void hci_uart_tty_receive(struct tty_struct *tty, const u8 *data, char *flags, int count) { struct hci_uart *hu = (void *)tty->disc_data; - int received; if (!hu || tty != hu->tty) - return -ENODEV; + return; if (!test_bit(HCI_UART_PROTO_SET, &hu->flags)) - return -EINVAL; + return; spin_lock(&hu->rx_lock); - received = hu->proto->recv(hu, (void *) data, count); - if (received > 0) - hu->hdev->stat.byte_rx += received; + hu->proto->recv(hu, (void *) data, count); + hu->hdev->stat.byte_rx += count; spin_unlock(&hu->rx_lock); tty_unthrottle(tty); - - return received; } static int hci_uart_register_dev(struct hci_uart *hu) diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index ae15a4ddaa9..7878da89d29 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -627,7 +627,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id) gendisk->fops = &viocd_fops; gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - gendisk->events = DISK_EVENT_MEDIA_CHANGE; set_capacity(gendisk, 0); gendisk->private_data = d; d->viocd_disk = gendisk; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 838568a7dbf..fb68b129537 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1677,17 +1677,12 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) portdev->config.max_nr_ports = 1; if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) { multiport = true; - vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT; - vdev->config->get(vdev, offsetof(struct virtio_console_config, max_nr_ports), &portdev->config.max_nr_ports, sizeof(portdev->config.max_nr_ports)); } - /* Let the Host know we support multiple ports.*/ - vdev->config->finalize_features(vdev); - err = init_vqs(portdev); if (err < 0) { dev_err(&vdev->dev, "Error %d initializing vqs\n", err); diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index 036e5865eb4..dc7c033ef58 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -24,7 +24,6 @@ #include <linux/ioport.h> #include <linux/io.h> #include <linux/clk.h> -#include <linux/pm_runtime.h> #include <linux/irq.h> #include <linux/err.h> #include <linux/clocksource.h> @@ -153,12 +152,10 @@ static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate) { int ret; - /* wake up device and enable clock */ - pm_runtime_get_sync(&p->pdev->dev); + /* enable clock */ ret = clk_enable(p->clk); if (ret) { dev_err(&p->pdev->dev, "cannot enable clock\n"); - pm_runtime_put_sync(&p->pdev->dev); return ret; } @@ -190,9 +187,8 @@ static void sh_cmt_disable(struct sh_cmt_priv *p) /* disable interrupts in CMT block */ sh_cmt_write(p, CMCSR, 0); - /* stop clock and mark device as idle */ + /* stop clock */ clk_disable(p->clk); - pm_runtime_put_sync(&p->pdev->dev); } /* private flags */ @@ -664,7 +660,6 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev) if (p) { dev_info(&pdev->dev, "kept as earlytimer\n"); - pm_runtime_enable(&pdev->dev); return 0; } @@ -679,9 +674,6 @@ static int __devinit sh_cmt_probe(struct platform_device *pdev) kfree(p); platform_set_drvdata(pdev, NULL); } - - if (!is_early_platform_device(pdev)) - pm_runtime_enable(&pdev->dev); return ret; } diff --git a/drivers/clocksource/sh_tmu.c b/drivers/clocksource/sh_tmu.c index 17296288a20..80813576861 100644 --- a/drivers/clocksource/sh_tmu.c +++ b/drivers/clocksource/sh_tmu.c @@ -25,7 +25,6 @@ #include <linux/delay.h> #include <linux/io.h> #include <linux/clk.h> -#include <linux/pm_runtime.h> #include <linux/irq.h> #include <linux/err.h> #include <linux/clocksource.h> @@ -110,12 +109,10 @@ static int sh_tmu_enable(struct sh_tmu_priv *p) { int ret; - /* wake up device and enable clock */ - pm_runtime_get_sync(&p->pdev->dev); + /* enable clock */ ret = clk_enable(p->clk); if (ret) { dev_err(&p->pdev->dev, "cannot enable clock\n"); - pm_runtime_put_sync(&p->pdev->dev); return ret; } @@ -144,9 +141,8 @@ static void sh_tmu_disable(struct sh_tmu_priv *p) /* disable interrupts in TMU block */ sh_tmu_write(p, TCR, 0x0000); - /* stop clock and mark device as idle */ + /* stop clock */ clk_disable(p->clk); - pm_runtime_put_sync(&p->pdev->dev); } static void sh_tmu_set_next(struct sh_tmu_priv *p, unsigned long delta, @@ -415,7 +411,6 @@ static int __devinit sh_tmu_probe(struct platform_device *pdev) if (p) { dev_info(&pdev->dev, "kept as earlytimer\n"); - pm_runtime_enable(&pdev->dev); return 0; } @@ -430,9 +425,6 @@ static int __devinit sh_tmu_probe(struct platform_device *pdev) kfree(p); platform_set_drvdata(pdev, NULL); } - - if (!is_early_platform_device(pdev)) - pm_runtime_enable(&pdev->dev); return ret; } diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index f508690eb95..c47f3d09c1e 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -237,6 +237,7 @@ static int menu_select(struct cpuidle_device *dev) unsigned int power_usage = -1; int i; int multiplier; + struct timespec t; if (data->needs_update) { menu_update(dev); @@ -251,8 +252,9 @@ static int menu_select(struct cpuidle_device *dev) return 0; /* determine the expected residency time, round up */ + t = ktime_to_timespec(tick_nohz_get_sleep_length()); data->expected_us = - DIV_ROUND_UP((u32)ktime_to_ns(tick_nohz_get_sleep_length()), 1000); + t.tv_sec * USEC_PER_SEC + t.tv_nsec / NSEC_PER_USEC; data->bucket = which_bucket(data->expected_us); diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index a572600e44e..25cf327cd1c 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -200,16 +200,18 @@ config PL330_DMA platform_data for a dma-pl330 device. config PCH_DMA - tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7213 IOH DMA support" + tristate "Intel EG20T PCH / OKI Semi IOH(ML7213/ML7223) DMA support" depends on PCI && X86 select DMA_ENGINE help Enable support for Intel EG20T PCH DMA engine. - This driver also can be used for OKI SEMICONDUCTOR ML7213 IOH(Input/ - Output Hub) which is for IVI(In-Vehicle Infotainment) use. - ML7213 is companion chip for Intel Atom E6xx series. - ML7213 is completely compatible for Intel EG20T PCH. + This driver also can be used for OKI SEMICONDUCTOR IOH(Input/ + Output Hub), ML7213 and ML7223. + ML7213 IOH is for IVI(In-Vehicle Infotainment) use and ML7223 IOH is + for MP(Media Phone) use. + ML7213/ML7223 is companion chip for Intel Atom E6xx series. + ML7213/ML7223 is completely compatible for Intel EG20T PCH. config IMX_SDMA tristate "i.MX SDMA support" diff --git a/drivers/dma/TODO b/drivers/dma/TODO new file mode 100644 index 00000000000..a4af8589330 --- /dev/null +++ b/drivers/dma/TODO @@ -0,0 +1,14 @@ +TODO for slave dma + +1. Move remaining drivers to use new slave interface +2. Remove old slave pointer machansim +3. Make issue_pending to start the transaction in below drivers + - mpc512x_dma + - imx-dma + - imx-sdma + - mxs-dma.c + - dw_dmac + - intel_mid_dma + - ste_dma40 +4. Check other subsystems for dma drivers and merge/move to dmaengine +5. Remove dma_slave_config's dma direction. diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 235f53bf494..36144f88d71 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -37,8 +37,8 @@ #define ATC_DEFAULT_CFG (ATC_FIFOCFG_HALFFIFO) #define ATC_DEFAULT_CTRLA (0) -#define ATC_DEFAULT_CTRLB (ATC_SIF(0) \ - |ATC_DIF(1)) +#define ATC_DEFAULT_CTRLB (ATC_SIF(AT_DMA_MEM_IF) \ + |ATC_DIF(AT_DMA_MEM_IF)) /* * Initial number of descriptors to allocate for each channel. This could @@ -165,6 +165,29 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc) } /** + * atc_desc_chain - build chain adding a descripor + * @first: address of first descripor of the chain + * @prev: address of previous descripor of the chain + * @desc: descriptor to queue + * + * Called from prep_* functions + */ +static void atc_desc_chain(struct at_desc **first, struct at_desc **prev, + struct at_desc *desc) +{ + if (!(*first)) { + *first = desc; + } else { + /* inform the HW lli about chaining */ + (*prev)->lli.dscr = desc->txd.phys; + /* insert the link descriptor to the LD ring */ + list_add_tail(&desc->desc_node, + &(*first)->tx_list); + } + *prev = desc; +} + +/** * atc_assign_cookie - compute and assign new cookie * @atchan: channel we work on * @desc: descriptor to assign cookie for @@ -237,16 +260,12 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first) static void atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) { - dma_async_tx_callback callback; - void *param; struct dma_async_tx_descriptor *txd = &desc->txd; dev_vdbg(chan2dev(&atchan->chan_common), "descriptor %u complete\n", txd->cookie); atchan->completed_cookie = txd->cookie; - callback = txd->callback; - param = txd->callback_param; /* move children to free_list */ list_splice_init(&desc->tx_list, &atchan->free_list); @@ -278,12 +297,19 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) } } - /* - * The API requires that no submissions are done from a - * callback, so we don't need to drop the lock here - */ - if (callback) - callback(param); + /* for cyclic transfers, + * no need to replay callback function while stopping */ + if (!test_bit(ATC_IS_CYCLIC, &atchan->status)) { + dma_async_tx_callback callback = txd->callback; + void *param = txd->callback_param; + + /* + * The API requires that no submissions are done from a + * callback, so we don't need to drop the lock here + */ + if (callback) + callback(param); + } dma_run_dependencies(txd); } @@ -419,6 +445,26 @@ static void atc_handle_error(struct at_dma_chan *atchan) atc_chain_complete(atchan, bad_desc); } +/** + * atc_handle_cyclic - at the end of a period, run callback function + * @atchan: channel used for cyclic operations + * + * Called with atchan->lock held and bh disabled + */ +static void atc_handle_cyclic(struct at_dma_chan *atchan) +{ + struct at_desc *first = atc_first_active(atchan); + struct dma_async_tx_descriptor *txd = &first->txd; + dma_async_tx_callback callback = txd->callback; + void *param = txd->callback_param; + + dev_vdbg(chan2dev(&atchan->chan_common), + "new cyclic period llp 0x%08x\n", + channel_readl(atchan, DSCR)); + + if (callback) + callback(param); +} /*-- IRQ & Tasklet ---------------------------------------------------*/ @@ -426,16 +472,11 @@ static void atc_tasklet(unsigned long data) { struct at_dma_chan *atchan = (struct at_dma_chan *)data; - /* Channel cannot be enabled here */ - if (atc_chan_is_enabled(atchan)) { - dev_err(chan2dev(&atchan->chan_common), - "BUG: channel enabled in tasklet\n"); - return; - } - spin_lock(&atchan->lock); - if (test_and_clear_bit(0, &atchan->error_status)) + if (test_and_clear_bit(ATC_IS_ERROR, &atchan->status)) atc_handle_error(atchan); + else if (test_bit(ATC_IS_CYCLIC, &atchan->status)) + atc_handle_cyclic(atchan); else atc_advance_work(atchan); @@ -464,12 +505,13 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id) for (i = 0; i < atdma->dma_common.chancnt; i++) { atchan = &atdma->chan[i]; - if (pending & (AT_DMA_CBTC(i) | AT_DMA_ERR(i))) { + if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) { if (pending & AT_DMA_ERR(i)) { /* Disable channel on AHB error */ - dma_writel(atdma, CHDR, atchan->mask); + dma_writel(atdma, CHDR, + AT_DMA_RES(i) | atchan->mask); /* Give information to tasklet */ - set_bit(0, &atchan->error_status); + set_bit(ATC_IS_ERROR, &atchan->status); } tasklet_schedule(&atchan->tasklet); ret = IRQ_HANDLED; @@ -549,7 +591,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, } ctrla = ATC_DEFAULT_CTRLA; - ctrlb = ATC_DEFAULT_CTRLB + ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN | ATC_SRC_ADDR_MODE_INCR | ATC_DST_ADDR_MODE_INCR | ATC_FC_MEM2MEM; @@ -584,16 +626,7 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, desc->txd.cookie = 0; - if (!first) { - first = desc; - } else { - /* inform the HW lli about chaining */ - prev->lli.dscr = desc->txd.phys; - /* insert the link descriptor to the LD ring */ - list_add_tail(&desc->desc_node, - &first->tx_list); - } - prev = desc; + atc_desc_chain(&first, &prev, desc); } /* First descriptor of the chain embedds additional information */ @@ -639,7 +672,8 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, struct scatterlist *sg; size_t total_len = 0; - dev_vdbg(chan2dev(chan), "prep_slave_sg: %s f0x%lx\n", + dev_vdbg(chan2dev(chan), "prep_slave_sg (%d): %s f0x%lx\n", + sg_len, direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE", flags); @@ -651,14 +685,15 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg_width = atslave->reg_width; ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla; - ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN; + ctrlb = ATC_IEN; switch (direction) { case DMA_TO_DEVICE: ctrla |= ATC_DST_WIDTH(reg_width); ctrlb |= ATC_DST_ADDR_MODE_FIXED | ATC_SRC_ADDR_MODE_INCR - | ATC_FC_MEM2PER; + | ATC_FC_MEM2PER + | ATC_SIF(AT_DMA_MEM_IF) | ATC_DIF(AT_DMA_PER_IF); reg = atslave->tx_reg; for_each_sg(sgl, sg, sg_len, i) { struct at_desc *desc; @@ -682,16 +717,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | len >> mem_width; desc->lli.ctrlb = ctrlb; - if (!first) { - first = desc; - } else { - /* inform the HW lli about chaining */ - prev->lli.dscr = desc->txd.phys; - /* insert the link descriptor to the LD ring */ - list_add_tail(&desc->desc_node, - &first->tx_list); - } - prev = desc; + atc_desc_chain(&first, &prev, desc); total_len += len; } break; @@ -699,7 +725,8 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, ctrla |= ATC_SRC_WIDTH(reg_width); ctrlb |= ATC_DST_ADDR_MODE_INCR | ATC_SRC_ADDR_MODE_FIXED - | ATC_FC_PER2MEM; + | ATC_FC_PER2MEM + | ATC_SIF(AT_DMA_PER_IF) | ATC_DIF(AT_DMA_MEM_IF); reg = atslave->rx_reg; for_each_sg(sgl, sg, sg_len, i) { @@ -724,16 +751,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, | len >> reg_width; desc->lli.ctrlb = ctrlb; - if (!first) { - first = desc; - } else { - /* inform the HW lli about chaining */ - prev->lli.dscr = desc->txd.phys; - /* insert the link descriptor to the LD ring */ - list_add_tail(&desc->desc_node, - &first->tx_list); - } - prev = desc; + atc_desc_chain(&first, &prev, desc); total_len += len; } break; @@ -759,41 +777,211 @@ err_desc_get: return NULL; } +/** + * atc_dma_cyclic_check_values + * Check for too big/unaligned periods and unaligned DMA buffer + */ +static int +atc_dma_cyclic_check_values(unsigned int reg_width, dma_addr_t buf_addr, + size_t period_len, enum dma_data_direction direction) +{ + if (period_len > (ATC_BTSIZE_MAX << reg_width)) + goto err_out; + if (unlikely(period_len & ((1 << reg_width) - 1))) + goto err_out; + if (unlikely(buf_addr & ((1 << reg_width) - 1))) + goto err_out; + if (unlikely(!(direction & (DMA_TO_DEVICE | DMA_FROM_DEVICE)))) + goto err_out; + + return 0; + +err_out: + return -EINVAL; +} + +/** + * atc_dma_cyclic_fill_desc - Fill one period decriptor + */ +static int +atc_dma_cyclic_fill_desc(struct at_dma_slave *atslave, struct at_desc *desc, + unsigned int period_index, dma_addr_t buf_addr, + size_t period_len, enum dma_data_direction direction) +{ + u32 ctrla; + unsigned int reg_width = atslave->reg_width; + + /* prepare common CRTLA value */ + ctrla = ATC_DEFAULT_CTRLA | atslave->ctrla + | ATC_DST_WIDTH(reg_width) + | ATC_SRC_WIDTH(reg_width) + | period_len >> reg_width; + + switch (direction) { + case DMA_TO_DEVICE: + desc->lli.saddr = buf_addr + (period_len * period_index); + desc->lli.daddr = atslave->tx_reg; + desc->lli.ctrla = ctrla; + desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED + | ATC_SRC_ADDR_MODE_INCR + | ATC_FC_MEM2PER + | ATC_SIF(AT_DMA_MEM_IF) + | ATC_DIF(AT_DMA_PER_IF); + break; + + case DMA_FROM_DEVICE: + desc->lli.saddr = atslave->rx_reg; + desc->lli.daddr = buf_addr + (period_len * period_index); + desc->lli.ctrla = ctrla; + desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR + | ATC_SRC_ADDR_MODE_FIXED + | ATC_FC_PER2MEM + | ATC_SIF(AT_DMA_PER_IF) + | ATC_DIF(AT_DMA_MEM_IF); + break; + + default: + return -EINVAL; + } + + return 0; +} + +/** + * atc_prep_dma_cyclic - prepare the cyclic DMA transfer + * @chan: the DMA channel to prepare + * @buf_addr: physical DMA address where the buffer starts + * @buf_len: total number of bytes for the entire buffer + * @period_len: number of bytes for each period + * @direction: transfer direction, to or from device + */ +static struct dma_async_tx_descriptor * +atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len, + size_t period_len, enum dma_data_direction direction) +{ + struct at_dma_chan *atchan = to_at_dma_chan(chan); + struct at_dma_slave *atslave = chan->private; + struct at_desc *first = NULL; + struct at_desc *prev = NULL; + unsigned long was_cyclic; + unsigned int periods = buf_len / period_len; + unsigned int i; + + dev_vdbg(chan2dev(chan), "prep_dma_cyclic: %s buf@0x%08x - %d (%d/%d)\n", + direction == DMA_TO_DEVICE ? "TO DEVICE" : "FROM DEVICE", + buf_addr, + periods, buf_len, period_len); + + if (unlikely(!atslave || !buf_len || !period_len)) { + dev_dbg(chan2dev(chan), "prep_dma_cyclic: length is zero!\n"); + return NULL; + } + + was_cyclic = test_and_set_bit(ATC_IS_CYCLIC, &atchan->status); + if (was_cyclic) { + dev_dbg(chan2dev(chan), "prep_dma_cyclic: channel in use!\n"); + return NULL; + } + + /* Check for too big/unaligned periods and unaligned DMA buffer */ + if (atc_dma_cyclic_check_values(atslave->reg_width, buf_addr, + period_len, direction)) + goto err_out; + + /* build cyclic linked list */ + for (i = 0; i < periods; i++) { + struct at_desc *desc; + + desc = atc_desc_get(atchan); + if (!desc) + goto err_desc_get; + + if (atc_dma_cyclic_fill_desc(atslave, desc, i, buf_addr, + period_len, direction)) + goto err_desc_get; + + atc_desc_chain(&first, &prev, desc); + } + + /* lets make a cyclic list */ + prev->lli.dscr = first->txd.phys; + + /* First descriptor of the chain embedds additional information */ + first->txd.cookie = -EBUSY; + first->len = buf_len; + + return &first->txd; + +err_desc_get: + dev_err(chan2dev(chan), "not enough descriptors available\n"); + atc_desc_put(atchan, first); +err_out: + clear_bit(ATC_IS_CYCLIC, &atchan->status); + return NULL; +} + + static int atc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, unsigned long arg) { struct at_dma_chan *atchan = to_at_dma_chan(chan); struct at_dma *atdma = to_at_dma(chan->device); - struct at_desc *desc, *_desc; + int chan_id = atchan->chan_common.chan_id; + LIST_HEAD(list); - /* Only supports DMA_TERMINATE_ALL */ - if (cmd != DMA_TERMINATE_ALL) - return -ENXIO; + dev_vdbg(chan2dev(chan), "atc_control (%d)\n", cmd); - /* - * This is only called when something went wrong elsewhere, so - * we don't really care about the data. Just disable the - * channel. We still have to poll the channel enable bit due - * to AHB/HSB limitations. - */ - spin_lock_bh(&atchan->lock); + if (cmd == DMA_PAUSE) { + spin_lock_bh(&atchan->lock); - dma_writel(atdma, CHDR, atchan->mask); + dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id)); + set_bit(ATC_IS_PAUSED, &atchan->status); - /* confirm that this channel is disabled */ - while (dma_readl(atdma, CHSR) & atchan->mask) - cpu_relax(); + spin_unlock_bh(&atchan->lock); + } else if (cmd == DMA_RESUME) { + if (!test_bit(ATC_IS_PAUSED, &atchan->status)) + return 0; - /* active_list entries will end up before queued entries */ - list_splice_init(&atchan->queue, &list); - list_splice_init(&atchan->active_list, &list); + spin_lock_bh(&atchan->lock); - /* Flush all pending and queued descriptors */ - list_for_each_entry_safe(desc, _desc, &list, desc_node) - atc_chain_complete(atchan, desc); + dma_writel(atdma, CHDR, AT_DMA_RES(chan_id)); + clear_bit(ATC_IS_PAUSED, &atchan->status); - spin_unlock_bh(&atchan->lock); + spin_unlock_bh(&atchan->lock); + } else if (cmd == DMA_TERMINATE_ALL) { + struct at_desc *desc, *_desc; + /* + * This is only called when something went wrong elsewhere, so + * we don't really care about the data. Just disable the + * channel. We still have to poll the channel enable bit due + * to AHB/HSB limitations. + */ + spin_lock_bh(&atchan->lock); + + /* disabling channel: must also remove suspend state */ + dma_writel(atdma, CHDR, AT_DMA_RES(chan_id) | atchan->mask); + + /* confirm that this channel is disabled */ + while (dma_readl(atdma, CHSR) & atchan->mask) + cpu_relax(); + + /* active_list entries will end up before queued entries */ + list_splice_init(&atchan->queue, &list); + list_splice_init(&atchan->active_list, &list); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + atc_chain_complete(atchan, desc); + + clear_bit(ATC_IS_PAUSED, &atchan->status); + /* if channel dedicated to cyclic operations, free it */ + clear_bit(ATC_IS_CYCLIC, &atchan->status); + + spin_unlock_bh(&atchan->lock); + } else { + return -ENXIO; + } return 0; } @@ -835,9 +1023,17 @@ atc_tx_status(struct dma_chan *chan, spin_unlock_bh(&atchan->lock); - dma_set_tx_state(txstate, last_complete, last_used, 0); - dev_vdbg(chan2dev(chan), "tx_status: %d (d%d, u%d)\n", - cookie, last_complete ? last_complete : 0, + if (ret != DMA_SUCCESS) + dma_set_tx_state(txstate, last_complete, last_used, + atc_first_active(atchan)->len); + else + dma_set_tx_state(txstate, last_complete, last_used, 0); + + if (test_bit(ATC_IS_PAUSED, &atchan->status)) + ret = DMA_PAUSED; + + dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d (d%d, u%d)\n", + ret, cookie, last_complete ? last_complete : 0, last_used ? last_used : 0); return ret; @@ -853,6 +1049,10 @@ static void atc_issue_pending(struct dma_chan *chan) dev_vdbg(chan2dev(chan), "issue_pending\n"); + /* Not needed for cyclic transfers */ + if (test_bit(ATC_IS_CYCLIC, &atchan->status)) + return; + spin_lock_bh(&atchan->lock); if (!atc_chan_is_enabled(atchan)) { atc_advance_work(atchan); @@ -959,6 +1159,7 @@ static void atc_free_chan_resources(struct dma_chan *chan) } list_splice_init(&atchan->free_list, &list); atchan->descs_allocated = 0; + atchan->status = 0; dev_vdbg(chan2dev(chan), "free_chan_resources: done\n"); } @@ -1092,10 +1293,15 @@ static int __init at_dma_probe(struct platform_device *pdev) if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask)) atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy; - if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) { + if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg; + + if (dma_has_cap(DMA_CYCLIC, atdma->dma_common.cap_mask)) + atdma->dma_common.device_prep_dma_cyclic = atc_prep_dma_cyclic; + + if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask) || + dma_has_cap(DMA_CYCLIC, atdma->dma_common.cap_mask)) atdma->dma_common.device_control = atc_control; - } dma_writel(atdma, EN, AT_DMA_ENABLE); diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h index 495457e3dc4..087dbf1dd39 100644 --- a/drivers/dma/at_hdmac_regs.h +++ b/drivers/dma/at_hdmac_regs.h @@ -103,6 +103,10 @@ /* Bitfields in CTRLB */ #define ATC_SIF(i) (0x3 & (i)) /* Src tx done via AHB-Lite Interface i */ #define ATC_DIF(i) ((0x3 & (i)) << 4) /* Dst tx done via AHB-Lite Interface i */ + /* Specify AHB interfaces */ +#define AT_DMA_MEM_IF 0 /* interface 0 as memory interface */ +#define AT_DMA_PER_IF 1 /* interface 1 as peripheral interface */ + #define ATC_SRC_PIP (0x1 << 8) /* Source Picture-in-Picture enabled */ #define ATC_DST_PIP (0x1 << 12) /* Destination Picture-in-Picture enabled */ #define ATC_SRC_DSCR_DIS (0x1 << 16) /* Src Descriptor fetch disable */ @@ -181,12 +185,23 @@ txd_to_at_desc(struct dma_async_tx_descriptor *txd) /*-- Channels --------------------------------------------------------*/ /** + * atc_status - information bits stored in channel status flag + * + * Manipulated with atomic operations. + */ +enum atc_status { + ATC_IS_ERROR = 0, + ATC_IS_PAUSED = 1, + ATC_IS_CYCLIC = 24, +}; + +/** * struct at_dma_chan - internal representation of an Atmel HDMAC channel * @chan_common: common dmaengine channel object members * @device: parent device * @ch_regs: memory mapped register base * @mask: channel index in a mask - * @error_status: transmit error status information from irq handler + * @status: transmit status information from irq/prep* functions * to tasklet (use atomic operations) * @tasklet: bottom half to finish transaction work * @lock: serializes enqueue/dequeue operations to descriptors lists @@ -201,7 +216,7 @@ struct at_dma_chan { struct at_dma *device; void __iomem *ch_regs; u8 mask; - unsigned long error_status; + unsigned long status; struct tasklet_struct tasklet; spinlock_t lock; @@ -309,8 +324,8 @@ static void atc_setup_irq(struct at_dma_chan *atchan, int on) struct at_dma *atdma = to_at_dma(atchan->chan_common.device); u32 ebci; - /* enable interrupts on buffer chain completion & error */ - ebci = AT_DMA_CBTC(atchan->chan_common.chan_id) + /* enable interrupts on buffer transfer completion & error */ + ebci = AT_DMA_BTC(atchan->chan_common.chan_id) | AT_DMA_ERR(atchan->chan_common.chan_id); if (on) dma_writel(atdma, EBCIER, ebci); @@ -347,7 +362,12 @@ static inline int atc_chan_is_enabled(struct at_dma_chan *atchan) */ static void set_desc_eol(struct at_desc *desc) { - desc->lli.ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; + u32 ctrlb = desc->lli.ctrlb; + + ctrlb &= ~ATC_IEN; + ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS; + + desc->lli.ctrlb = ctrlb; desc->lli.dscr = 0; } diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index f48e5400651..af8c0b5ed70 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1610,7 +1610,7 @@ int __init coh901318_init(void) { return platform_driver_probe(&coh901318_driver, coh901318_probe); } -arch_initcall(coh901318_init); +subsys_initcall(coh901318_init); void __exit coh901318_exit(void) { diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 2a2e2fa00e9..4d180ca9a1d 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -3,6 +3,7 @@ * AVR32 systems.) * * Copyright (C) 2007-2008 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -93,8 +94,9 @@ static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) struct dw_desc *desc, *_desc; struct dw_desc *ret = NULL; unsigned int i = 0; + unsigned long flags; - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) { if (async_tx_test_ack(&desc->txd)) { list_del(&desc->desc_node); @@ -104,7 +106,7 @@ static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) dev_dbg(chan2dev(&dwc->chan), "desc %p not ACKed\n", desc); i++; } - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); dev_vdbg(chan2dev(&dwc->chan), "scanned %u descriptors on freelist\n", i); @@ -130,12 +132,14 @@ static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc) */ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) { + unsigned long flags; + if (desc) { struct dw_desc *child; dwc_sync_desc_for_cpu(dwc, desc); - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); list_for_each_entry(child, &desc->tx_list, desc_node) dev_vdbg(chan2dev(&dwc->chan), "moving child desc %p to freelist\n", @@ -143,7 +147,7 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) list_splice_init(&desc->tx_list, &dwc->free_list); dev_vdbg(chan2dev(&dwc->chan), "moving desc %p to freelist\n", desc); list_add(&desc->desc_node, &dwc->free_list); - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); } } @@ -195,18 +199,23 @@ static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) /*----------------------------------------------------------------------*/ static void -dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc) +dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc, + bool callback_required) { - dma_async_tx_callback callback; - void *param; + dma_async_tx_callback callback = NULL; + void *param = NULL; struct dma_async_tx_descriptor *txd = &desc->txd; struct dw_desc *child; + unsigned long flags; dev_vdbg(chan2dev(&dwc->chan), "descriptor %u complete\n", txd->cookie); + spin_lock_irqsave(&dwc->lock, flags); dwc->completed = txd->cookie; - callback = txd->callback; - param = txd->callback_param; + if (callback_required) { + callback = txd->callback; + param = txd->callback_param; + } dwc_sync_desc_for_cpu(dwc, desc); @@ -238,11 +247,9 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc) } } - /* - * The API requires that no submissions are done from a - * callback, so we don't need to drop the lock here - */ - if (callback) + spin_unlock_irqrestore(&dwc->lock, flags); + + if (callback_required && callback) callback(param); } @@ -250,7 +257,9 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) { struct dw_desc *desc, *_desc; LIST_HEAD(list); + unsigned long flags; + spin_lock_irqsave(&dwc->lock, flags); if (dma_readl(dw, CH_EN) & dwc->mask) { dev_err(chan2dev(&dwc->chan), "BUG: XFER bit set, but channel not idle!\n"); @@ -271,8 +280,10 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) dwc_dostart(dwc, dwc_first_active(dwc)); } + spin_unlock_irqrestore(&dwc->lock, flags); + list_for_each_entry_safe(desc, _desc, &list, desc_node) - dwc_descriptor_complete(dwc, desc); + dwc_descriptor_complete(dwc, desc, true); } static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) @@ -281,7 +292,9 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) struct dw_desc *desc, *_desc; struct dw_desc *child; u32 status_xfer; + unsigned long flags; + spin_lock_irqsave(&dwc->lock, flags); /* * Clear block interrupt flag before scanning so that we don't * miss any, and read LLP before RAW_XFER to ensure it is @@ -294,30 +307,47 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) if (status_xfer & dwc->mask) { /* Everything we've submitted is done */ dma_writel(dw, CLEAR.XFER, dwc->mask); + spin_unlock_irqrestore(&dwc->lock, flags); + dwc_complete_all(dw, dwc); return; } - if (list_empty(&dwc->active_list)) + if (list_empty(&dwc->active_list)) { + spin_unlock_irqrestore(&dwc->lock, flags); return; + } dev_vdbg(chan2dev(&dwc->chan), "scan_descriptors: llp=0x%x\n", llp); list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { - if (desc->lli.llp == llp) + /* check first descriptors addr */ + if (desc->txd.phys == llp) { + spin_unlock_irqrestore(&dwc->lock, flags); + return; + } + + /* check first descriptors llp */ + if (desc->lli.llp == llp) { /* This one is currently in progress */ + spin_unlock_irqrestore(&dwc->lock, flags); return; + } list_for_each_entry(child, &desc->tx_list, desc_node) - if (child->lli.llp == llp) + if (child->lli.llp == llp) { /* Currently in progress */ + spin_unlock_irqrestore(&dwc->lock, flags); return; + } /* * No descriptors so far seem to be in progress, i.e. * this one must be done. */ - dwc_descriptor_complete(dwc, desc); + spin_unlock_irqrestore(&dwc->lock, flags); + dwc_descriptor_complete(dwc, desc, true); + spin_lock_irqsave(&dwc->lock, flags); } dev_err(chan2dev(&dwc->chan), @@ -332,6 +362,7 @@ static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) list_move(dwc->queue.next, &dwc->active_list); dwc_dostart(dwc, dwc_first_active(dwc)); } + spin_unlock_irqrestore(&dwc->lock, flags); } static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli) @@ -346,9 +377,12 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) { struct dw_desc *bad_desc; struct dw_desc *child; + unsigned long flags; dwc_scan_descriptors(dw, dwc); + spin_lock_irqsave(&dwc->lock, flags); + /* * The descriptor currently at the head of the active list is * borked. Since we don't have any way to report errors, we'll @@ -378,8 +412,10 @@ static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) list_for_each_entry(child, &bad_desc->tx_list, desc_node) dwc_dump_lli(dwc, &child->lli); + spin_unlock_irqrestore(&dwc->lock, flags); + /* Pretend the descriptor completed successfully */ - dwc_descriptor_complete(dwc, bad_desc); + dwc_descriptor_complete(dwc, bad_desc, true); } /* --------------------- Cyclic DMA API extensions -------------------- */ @@ -402,6 +438,8 @@ EXPORT_SYMBOL(dw_dma_get_dst_addr); static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, u32 status_block, u32 status_err, u32 status_xfer) { + unsigned long flags; + if (status_block & dwc->mask) { void (*callback)(void *param); void *callback_param; @@ -412,11 +450,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, callback = dwc->cdesc->period_callback; callback_param = dwc->cdesc->period_callback_param; - if (callback) { - spin_unlock(&dwc->lock); + + if (callback) callback(callback_param); - spin_lock(&dwc->lock); - } } /* @@ -430,6 +466,9 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, dev_err(chan2dev(&dwc->chan), "cyclic DMA unexpected %s " "interrupt, stopping DMA transfer\n", status_xfer ? "xfer" : "error"); + + spin_lock_irqsave(&dwc->lock, flags); + dev_err(chan2dev(&dwc->chan), " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n", channel_readl(dwc, SAR), @@ -453,6 +492,8 @@ static void dwc_handle_cyclic(struct dw_dma *dw, struct dw_dma_chan *dwc, for (i = 0; i < dwc->cdesc->periods; i++) dwc_dump_lli(dwc, &dwc->cdesc->desc[i]->lli); + + spin_unlock_irqrestore(&dwc->lock, flags); } } @@ -476,7 +517,6 @@ static void dw_dma_tasklet(unsigned long data) for (i = 0; i < dw->dma.chancnt; i++) { dwc = &dw->chan[i]; - spin_lock(&dwc->lock); if (test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) dwc_handle_cyclic(dw, dwc, status_block, status_err, status_xfer); @@ -484,7 +524,6 @@ static void dw_dma_tasklet(unsigned long data) dwc_handle_error(dw, dwc); else if ((status_block | status_xfer) & (1 << i)) dwc_scan_descriptors(dw, dwc); - spin_unlock(&dwc->lock); } /* @@ -539,8 +578,9 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) struct dw_desc *desc = txd_to_dw_desc(tx); struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); dma_cookie_t cookie; + unsigned long flags; - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); cookie = dwc_assign_cookie(dwc, desc); /* @@ -560,7 +600,7 @@ static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) list_add_tail(&desc->desc_node, &dwc->queue); } - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); return cookie; } @@ -689,9 +729,15 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg = dws->tx_reg; for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; - u32 len; - u32 mem; + u32 len, dlen, mem; + + mem = sg_phys(sg); + len = sg_dma_len(sg); + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; +slave_sg_todev_fill_desc: desc = dwc_desc_get(dwc); if (!desc) { dev_err(chan2dev(chan), @@ -699,16 +745,19 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, goto err_desc_get; } - mem = sg_phys(sg); - len = sg_dma_len(sg); - mem_width = 2; - if (unlikely(mem & 3 || len & 3)) - mem_width = 0; - desc->lli.sar = mem; desc->lli.dar = reg; desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width); - desc->lli.ctlhi = len >> mem_width; + if ((len >> mem_width) > DWC_MAX_COUNT) { + dlen = DWC_MAX_COUNT << mem_width; + mem += dlen; + len -= dlen; + } else { + dlen = len; + len = 0; + } + + desc->lli.ctlhi = dlen >> mem_width; if (!first) { first = desc; @@ -722,7 +771,10 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, &first->tx_list); } prev = desc; - total_len += len; + total_len += dlen; + + if (len) + goto slave_sg_todev_fill_desc; } break; case DMA_FROM_DEVICE: @@ -735,15 +787,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, reg = dws->rx_reg; for_each_sg(sgl, sg, sg_len, i) { struct dw_desc *desc; - u32 len; - u32 mem; - - desc = dwc_desc_get(dwc); - if (!desc) { - dev_err(chan2dev(chan), - "not enough descriptors available\n"); - goto err_desc_get; - } + u32 len, dlen, mem; mem = sg_phys(sg); len = sg_dma_len(sg); @@ -751,10 +795,26 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, if (unlikely(mem & 3 || len & 3)) mem_width = 0; +slave_sg_fromdev_fill_desc: + desc = dwc_desc_get(dwc); + if (!desc) { + dev_err(chan2dev(chan), + "not enough descriptors available\n"); + goto err_desc_get; + } + desc->lli.sar = reg; desc->lli.dar = mem; desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width); - desc->lli.ctlhi = len >> reg_width; + if ((len >> reg_width) > DWC_MAX_COUNT) { + dlen = DWC_MAX_COUNT << reg_width; + mem += dlen; + len -= dlen; + } else { + dlen = len; + len = 0; + } + desc->lli.ctlhi = dlen >> reg_width; if (!first) { first = desc; @@ -768,7 +828,10 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, &first->tx_list); } prev = desc; - total_len += len; + total_len += dlen; + + if (len) + goto slave_sg_fromdev_fill_desc; } break; default: @@ -799,34 +862,51 @@ static int dwc_control(struct dma_chan *chan, enum dma_ctrl_cmd cmd, struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); struct dw_desc *desc, *_desc; + unsigned long flags; + u32 cfglo; LIST_HEAD(list); - /* Only supports DMA_TERMINATE_ALL */ - if (cmd != DMA_TERMINATE_ALL) - return -ENXIO; + if (cmd == DMA_PAUSE) { + spin_lock_irqsave(&dwc->lock, flags); - /* - * This is only called when something went wrong elsewhere, so - * we don't really care about the data. Just disable the - * channel. We still have to poll the channel enable bit due - * to AHB/HSB limitations. - */ - spin_lock_bh(&dwc->lock); + cfglo = channel_readl(dwc, CFG_LO); + channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP); + while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY)) + cpu_relax(); - channel_clear_bit(dw, CH_EN, dwc->mask); + dwc->paused = true; + spin_unlock_irqrestore(&dwc->lock, flags); + } else if (cmd == DMA_RESUME) { + if (!dwc->paused) + return 0; - while (dma_readl(dw, CH_EN) & dwc->mask) - cpu_relax(); + spin_lock_irqsave(&dwc->lock, flags); - /* active_list entries will end up before queued entries */ - list_splice_init(&dwc->queue, &list); - list_splice_init(&dwc->active_list, &list); + cfglo = channel_readl(dwc, CFG_LO); + channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP); + dwc->paused = false; - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); + } else if (cmd == DMA_TERMINATE_ALL) { + spin_lock_irqsave(&dwc->lock, flags); - /* Flush all pending and queued descriptors */ - list_for_each_entry_safe(desc, _desc, &list, desc_node) - dwc_descriptor_complete(dwc, desc); + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + + dwc->paused = false; + + /* active_list entries will end up before queued entries */ + list_splice_init(&dwc->queue, &list); + list_splice_init(&dwc->active_list, &list); + + spin_unlock_irqrestore(&dwc->lock, flags); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + dwc_descriptor_complete(dwc, desc, false); + } else + return -ENXIO; return 0; } @@ -846,9 +926,7 @@ dwc_tx_status(struct dma_chan *chan, ret = dma_async_is_complete(cookie, last_complete, last_used); if (ret != DMA_SUCCESS) { - spin_lock_bh(&dwc->lock); dwc_scan_descriptors(to_dw_dma(chan->device), dwc); - spin_unlock_bh(&dwc->lock); last_complete = dwc->completed; last_used = chan->cookie; @@ -856,7 +934,14 @@ dwc_tx_status(struct dma_chan *chan, ret = dma_async_is_complete(cookie, last_complete, last_used); } - dma_set_tx_state(txstate, last_complete, last_used, 0); + if (ret != DMA_SUCCESS) + dma_set_tx_state(txstate, last_complete, last_used, + dwc_first_active(dwc)->len); + else + dma_set_tx_state(txstate, last_complete, last_used, 0); + + if (dwc->paused) + return DMA_PAUSED; return ret; } @@ -865,10 +950,8 @@ static void dwc_issue_pending(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); - spin_lock_bh(&dwc->lock); if (!list_empty(&dwc->queue)) dwc_scan_descriptors(to_dw_dma(chan->device), dwc); - spin_unlock_bh(&dwc->lock); } static int dwc_alloc_chan_resources(struct dma_chan *chan) @@ -880,6 +963,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) int i; u32 cfghi; u32 cfglo; + unsigned long flags; dev_vdbg(chan2dev(chan), "alloc_chan_resources\n"); @@ -917,16 +1001,16 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) * doesn't mean what you think it means), and status writeback. */ - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); i = dwc->descs_allocated; while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) { - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL); if (!desc) { dev_info(chan2dev(chan), "only allocated %d descriptors\n", i); - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); break; } @@ -938,7 +1022,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) sizeof(desc->lli), DMA_TO_DEVICE); dwc_desc_put(dwc, desc); - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); i = ++dwc->descs_allocated; } @@ -947,7 +1031,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan) channel_set_bit(dw, MASK.BLOCK, dwc->mask); channel_set_bit(dw, MASK.ERROR, dwc->mask); - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); dev_dbg(chan2dev(chan), "alloc_chan_resources allocated %d descriptors\n", i); @@ -960,6 +1044,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(chan->device); struct dw_desc *desc, *_desc; + unsigned long flags; LIST_HEAD(list); dev_dbg(chan2dev(chan), "free_chan_resources (descs allocated=%u)\n", @@ -970,7 +1055,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) BUG_ON(!list_empty(&dwc->queue)); BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask); - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); list_splice_init(&dwc->free_list, &list); dwc->descs_allocated = 0; @@ -979,7 +1064,7 @@ static void dwc_free_chan_resources(struct dma_chan *chan) channel_clear_bit(dw, MASK.BLOCK, dwc->mask); channel_clear_bit(dw, MASK.ERROR, dwc->mask); - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); list_for_each_entry_safe(desc, _desc, &list, desc_node) { dev_vdbg(chan2dev(chan), " freeing descriptor %p\n", desc); @@ -1004,13 +1089,14 @@ int dw_dma_cyclic_start(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(dwc->chan.device); + unsigned long flags; if (!test_bit(DW_DMA_IS_CYCLIC, &dwc->flags)) { dev_err(chan2dev(&dwc->chan), "missing prep for cyclic DMA\n"); return -ENODEV; } - spin_lock(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); /* assert channel is idle */ if (dma_readl(dw, CH_EN) & dwc->mask) { @@ -1023,7 +1109,7 @@ int dw_dma_cyclic_start(struct dma_chan *chan) channel_readl(dwc, LLP), channel_readl(dwc, CTL_HI), channel_readl(dwc, CTL_LO)); - spin_unlock(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); return -EBUSY; } @@ -1038,7 +1124,7 @@ int dw_dma_cyclic_start(struct dma_chan *chan) channel_set_bit(dw, CH_EN, dwc->mask); - spin_unlock(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); return 0; } @@ -1054,14 +1140,15 @@ void dw_dma_cyclic_stop(struct dma_chan *chan) { struct dw_dma_chan *dwc = to_dw_dma_chan(chan); struct dw_dma *dw = to_dw_dma(dwc->chan.device); + unsigned long flags; - spin_lock(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); channel_clear_bit(dw, CH_EN, dwc->mask); while (dma_readl(dw, CH_EN) & dwc->mask) cpu_relax(); - spin_unlock(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); } EXPORT_SYMBOL(dw_dma_cyclic_stop); @@ -1090,17 +1177,18 @@ struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan, unsigned int reg_width; unsigned int periods; unsigned int i; + unsigned long flags; - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); if (!list_empty(&dwc->queue) || !list_empty(&dwc->active_list)) { - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); dev_dbg(chan2dev(&dwc->chan), "queue and/or active list are not empty\n"); return ERR_PTR(-EBUSY); } was_cyclic = test_and_set_bit(DW_DMA_IS_CYCLIC, &dwc->flags); - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); if (was_cyclic) { dev_dbg(chan2dev(&dwc->chan), "channel already prepared for cyclic DMA\n"); @@ -1214,13 +1302,14 @@ void dw_dma_cyclic_free(struct dma_chan *chan) struct dw_dma *dw = to_dw_dma(dwc->chan.device); struct dw_cyclic_desc *cdesc = dwc->cdesc; int i; + unsigned long flags; dev_dbg(chan2dev(&dwc->chan), "cyclic free\n"); if (!cdesc) return; - spin_lock_bh(&dwc->lock); + spin_lock_irqsave(&dwc->lock, flags); channel_clear_bit(dw, CH_EN, dwc->mask); while (dma_readl(dw, CH_EN) & dwc->mask) @@ -1230,7 +1319,7 @@ void dw_dma_cyclic_free(struct dma_chan *chan) dma_writel(dw, CLEAR.ERROR, dwc->mask); dma_writel(dw, CLEAR.XFER, dwc->mask); - spin_unlock_bh(&dwc->lock); + spin_unlock_irqrestore(&dwc->lock, flags); for (i = 0; i < cdesc->periods; i++) dwc_desc_put(dwc, cdesc->desc[i]); @@ -1487,3 +1576,4 @@ module_exit(dw_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver"); MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); +MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>"); diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h index 720f821527f..c3419518d70 100644 --- a/drivers/dma/dw_dmac_regs.h +++ b/drivers/dma/dw_dmac_regs.h @@ -2,6 +2,7 @@ * Driver for the Synopsys DesignWare AHB DMA Controller * * Copyright (C) 2005-2007 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -138,6 +139,7 @@ struct dw_dma_chan { void __iomem *ch_regs; u8 mask; u8 priority; + bool paused; spinlock_t lock; diff --git a/drivers/dma/intel_mid_dma.c b/drivers/dma/intel_mid_dma.c index 3d4ec38b9b6..f653517ef74 100644 --- a/drivers/dma/intel_mid_dma.c +++ b/drivers/dma/intel_mid_dma.c @@ -1292,8 +1292,7 @@ static int __devinit intel_mid_dma_probe(struct pci_dev *pdev, if (err) goto err_dma; - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); + pm_runtime_put_noidle(&pdev->dev); pm_runtime_allow(&pdev->dev); return 0; @@ -1322,6 +1321,9 @@ err_enable_device: static void __devexit intel_mid_dma_remove(struct pci_dev *pdev) { struct middma_device *device = pci_get_drvdata(pdev); + + pm_runtime_get_noresume(&pdev->dev); + pm_runtime_forbid(&pdev->dev); middma_shutdown(pdev); pci_dev_put(pdev); kfree(device); @@ -1385,13 +1387,20 @@ int dma_resume(struct pci_dev *pci) static int dma_runtime_suspend(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - return dma_suspend(pci_dev, PMSG_SUSPEND); + struct middma_device *device = pci_get_drvdata(pci_dev); + + device->state = SUSPENDED; + return 0; } static int dma_runtime_resume(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); - return dma_resume(pci_dev); + struct middma_device *device = pci_get_drvdata(pci_dev); + + device->state = RUNNING; + iowrite32(REG_BIT0, device->dma_base + DMA_CFG); + return 0; } static int dma_runtime_idle(struct device *dev) diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index f4a51d4d034..5d65f837797 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -508,6 +508,7 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) struct ioat_ring_ent **ring; u64 status; int order; + int i = 0; /* have we already been set up? */ if (ioat->ring) @@ -548,8 +549,11 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) ioat2_start_null_desc(ioat); /* check that we got off the ground */ - udelay(5); - status = ioat_chansts(chan); + do { + udelay(1); + status = ioat_chansts(chan); + } while (i++ < 20 && !is_ioat_active(status) && !is_ioat_idle(status)); + if (is_ioat_active(status) || is_ioat_idle(status)) { set_bit(IOAT_RUN, &chan->state); return 1 << ioat->alloc_order; diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index c6b01f535b2..e03f811a83d 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -619,7 +619,7 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", __func__, len); @@ -652,7 +652,7 @@ iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", __func__, len); @@ -686,7 +686,7 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT); dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u flags: %lx\n", diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index a25f5f61e0e..954e334e01b 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -671,7 +671,7 @@ mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); spin_lock_bh(&mv_chan->lock); slot_cnt = mv_chan_memcpy_slot_count(len); @@ -710,7 +710,7 @@ mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); spin_lock_bh(&mv_chan->lock); slot_cnt = mv_chan_memset_slot_count(len); @@ -744,7 +744,7 @@ mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) return NULL; - BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > MV_XOR_MAX_BYTE_COUNT); dev_dbg(mv_chan->device->common.dev, "%s src_cnt: %d len: dest %x %u flags: %ld\n", diff --git a/drivers/dma/pch_dma.c b/drivers/dma/pch_dma.c index 8d8fef1480a..ff5b38f9d45 100644 --- a/drivers/dma/pch_dma.c +++ b/drivers/dma/pch_dma.c @@ -77,10 +77,10 @@ struct pch_dma_regs { u32 dma_ctl0; u32 dma_ctl1; u32 dma_ctl2; - u32 reserved1; + u32 dma_ctl3; u32 dma_sts0; u32 dma_sts1; - u32 reserved2; + u32 dma_sts2; u32 reserved3; struct pch_dma_desc_regs desc[MAX_CHAN_NR]; }; @@ -130,6 +130,7 @@ struct pch_dma { #define PCH_DMA_CTL0 0x00 #define PCH_DMA_CTL1 0x04 #define PCH_DMA_CTL2 0x08 +#define PCH_DMA_CTL3 0x0C #define PCH_DMA_STS0 0x10 #define PCH_DMA_STS1 0x14 @@ -138,7 +139,8 @@ struct pch_dma { #define dma_writel(pd, name, val) \ writel((val), (pd)->membase + PCH_DMA_##name) -static inline struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd) +static inline +struct pch_dma_desc *to_pd_desc(struct dma_async_tx_descriptor *txd) { return container_of(txd, struct pch_dma_desc, txd); } @@ -163,13 +165,15 @@ static inline struct device *chan2parent(struct dma_chan *chan) return chan->dev->device.parent; } -static inline struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan) +static inline +struct pch_dma_desc *pdc_first_active(struct pch_dma_chan *pd_chan) { return list_first_entry(&pd_chan->active_list, struct pch_dma_desc, desc_node); } -static inline struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan) +static inline +struct pch_dma_desc *pdc_first_queued(struct pch_dma_chan *pd_chan) { return list_first_entry(&pd_chan->queue, struct pch_dma_desc, desc_node); @@ -199,16 +203,30 @@ static void pdc_set_dir(struct dma_chan *chan) struct pch_dma *pd = to_pd(chan->device); u32 val; - val = dma_readl(pd, CTL0); + if (chan->chan_id < 8) { + val = dma_readl(pd, CTL0); - if (pd_chan->dir == DMA_TO_DEVICE) - val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + - DMA_CTL0_DIR_SHIFT_BITS); - else - val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + - DMA_CTL0_DIR_SHIFT_BITS)); + if (pd_chan->dir == DMA_TO_DEVICE) + val |= 0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + + DMA_CTL0_DIR_SHIFT_BITS); + else + val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * chan->chan_id + + DMA_CTL0_DIR_SHIFT_BITS)); + + dma_writel(pd, CTL0, val); + } else { + int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ + val = dma_readl(pd, CTL3); - dma_writel(pd, CTL0, val); + if (pd_chan->dir == DMA_TO_DEVICE) + val |= 0x1 << (DMA_CTL0_BITS_PER_CH * ch + + DMA_CTL0_DIR_SHIFT_BITS); + else + val &= ~(0x1 << (DMA_CTL0_BITS_PER_CH * ch + + DMA_CTL0_DIR_SHIFT_BITS)); + + dma_writel(pd, CTL3, val); + } dev_dbg(chan2dev(chan), "pdc_set_dir: chan %d -> %x\n", chan->chan_id, val); @@ -219,13 +237,26 @@ static void pdc_set_mode(struct dma_chan *chan, u32 mode) struct pch_dma *pd = to_pd(chan->device); u32 val; - val = dma_readl(pd, CTL0); + if (chan->chan_id < 8) { + val = dma_readl(pd, CTL0); + + val &= ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * chan->chan_id)); + val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id); - val &= ~(DMA_CTL0_MODE_MASK_BITS << - (DMA_CTL0_BITS_PER_CH * chan->chan_id)); - val |= mode << (DMA_CTL0_BITS_PER_CH * chan->chan_id); + dma_writel(pd, CTL0, val); + } else { + int ch = chan->chan_id - 8; /* ch8-->0 ch9-->1 ... ch11->3 */ + + val = dma_readl(pd, CTL3); + + val &= ~(DMA_CTL0_MODE_MASK_BITS << + (DMA_CTL0_BITS_PER_CH * ch)); + val |= mode << (DMA_CTL0_BITS_PER_CH * ch); - dma_writel(pd, CTL0, val); + dma_writel(pd, CTL3, val); + + } dev_dbg(chan2dev(chan), "pdc_set_mode: chan %d -> %x\n", chan->chan_id, val); @@ -251,9 +282,6 @@ static bool pdc_is_idle(struct pch_dma_chan *pd_chan) static void pdc_dostart(struct pch_dma_chan *pd_chan, struct pch_dma_desc* desc) { - struct pch_dma *pd = to_pd(pd_chan->chan.device); - u32 val; - if (!pdc_is_idle(pd_chan)) { dev_err(chan2dev(&pd_chan->chan), "BUG: Attempt to start non-idle channel\n"); @@ -279,10 +307,6 @@ static void pdc_dostart(struct pch_dma_chan *pd_chan, struct pch_dma_desc* desc) channel_writel(pd_chan, NEXT, desc->txd.phys); pdc_set_mode(&pd_chan->chan, DMA_CTL0_SG); } - - val = dma_readl(pd, CTL2); - val |= 1 << (DMA_CTL2_START_SHIFT_BITS + pd_chan->chan.chan_id); - dma_writel(pd, CTL2, val); } static void pdc_chain_complete(struct pch_dma_chan *pd_chan, @@ -403,7 +427,7 @@ static struct pch_dma_desc *pdc_desc_get(struct pch_dma_chan *pd_chan) { struct pch_dma_desc *desc, *_d; struct pch_dma_desc *ret = NULL; - int i; + int i = 0; spin_lock(&pd_chan->lock); list_for_each_entry_safe(desc, _d, &pd_chan->free_list, desc_node) { @@ -478,7 +502,6 @@ static int pd_alloc_chan_resources(struct dma_chan *chan) spin_unlock_bh(&pd_chan->lock); pdc_enable_irq(chan, 1); - pdc_set_dir(chan); return pd_chan->descs_allocated; } @@ -561,6 +584,9 @@ static struct dma_async_tx_descriptor *pd_prep_slave_sg(struct dma_chan *chan, else return NULL; + pd_chan->dir = direction; + pdc_set_dir(chan); + for_each_sg(sgl, sg, sg_len, i) { desc = pdc_desc_get(pd_chan); @@ -703,6 +729,7 @@ static void pch_dma_save_regs(struct pch_dma *pd) pd->regs.dma_ctl0 = dma_readl(pd, CTL0); pd->regs.dma_ctl1 = dma_readl(pd, CTL1); pd->regs.dma_ctl2 = dma_readl(pd, CTL2); + pd->regs.dma_ctl3 = dma_readl(pd, CTL3); list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) { pd_chan = to_pd_chan(chan); @@ -725,6 +752,7 @@ static void pch_dma_restore_regs(struct pch_dma *pd) dma_writel(pd, CTL0, pd->regs.dma_ctl0); dma_writel(pd, CTL1, pd->regs.dma_ctl1); dma_writel(pd, CTL2, pd->regs.dma_ctl2); + dma_writel(pd, CTL3, pd->regs.dma_ctl3); list_for_each_entry_safe(chan, _c, &pd->dma.channels, device_node) { pd_chan = to_pd_chan(chan); @@ -850,8 +878,6 @@ static int __devinit pch_dma_probe(struct pci_dev *pdev, pd_chan->membase = ®s->desc[i]; - pd_chan->dir = (i % 2) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - spin_lock_init(&pd_chan->lock); INIT_LIST_HEAD(&pd_chan->active_list); @@ -929,13 +955,23 @@ static void __devexit pch_dma_remove(struct pci_dev *pdev) #define PCI_DEVICE_ID_ML7213_DMA1_8CH 0x8026 #define PCI_DEVICE_ID_ML7213_DMA2_8CH 0x802B #define PCI_DEVICE_ID_ML7213_DMA3_4CH 0x8034 +#define PCI_DEVICE_ID_ML7213_DMA4_12CH 0x8032 +#define PCI_DEVICE_ID_ML7223_DMA1_4CH 0x800B +#define PCI_DEVICE_ID_ML7223_DMA2_4CH 0x800E +#define PCI_DEVICE_ID_ML7223_DMA3_4CH 0x8017 +#define PCI_DEVICE_ID_ML7223_DMA4_4CH 0x803B -static const struct pci_device_id pch_dma_id_table[] = { +DEFINE_PCI_DEVICE_TABLE(pch_dma_id_table) = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_8CH), 8 }, { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_EG20T_PCH_DMA_4CH), 4 }, { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA1_8CH), 8}, /* UART Video */ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA2_8CH), 8}, /* PCMIF SPI */ { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA3_4CH), 4}, /* FPGA */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7213_DMA4_12CH), 12}, /* I2S */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA1_4CH), 4}, /* UART */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA2_4CH), 4}, /* Video SPI */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA3_4CH), 4}, /* Security */ + { PCI_VDEVICE(ROHM, PCI_DEVICE_ID_ML7223_DMA4_4CH), 4}, /* FPGA */ { 0, }, }; diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index 3b0247e74cc..fc457a7e883 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -2313,7 +2313,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memcpy( if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); spin_lock_bh(&ppc440spe_chan->lock); @@ -2354,7 +2354,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_memset( if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_DMA_MAX_BYTE_COUNT); spin_lock_bh(&ppc440spe_chan->lock); @@ -2397,7 +2397,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_xor( dma_dest, dma_src, src_cnt)); if (unlikely(!len)) return NULL; - BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); dev_dbg(ppc440spe_chan->device->common.dev, "ppc440spe adma%d: %s src_cnt: %d len: %u int_en: %d\n", @@ -2887,7 +2887,7 @@ static struct dma_async_tx_descriptor *ppc440spe_adma_prep_dma_pq( ADMA_LL_DBG(prep_dma_pq_dbg(ppc440spe_chan->device->id, dst, src, src_cnt)); BUG_ON(!len); - BUG_ON(unlikely(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)); + BUG_ON(len > PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT); BUG_ON(!src_cnt); if (src_cnt == 1 && dst[1] == src[0]) { diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 636e40925b1..2a638f9f09a 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -343,7 +343,7 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) dmae_set_dmars(sh_chan, cfg->mid_rid); dmae_set_chcr(sh_chan, cfg->chcr); - } else if ((sh_dmae_readl(sh_chan, CHCR) & 0xf00) != 0x400) { + } else { dmae_init(sh_chan); } @@ -1144,6 +1144,8 @@ static int __init sh_dmae_probe(struct platform_device *pdev) /* platform data */ shdev->pdata = pdata; + platform_set_drvdata(pdev, shdev); + pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -1256,7 +1258,6 @@ static int __init sh_dmae_probe(struct platform_device *pdev) pm_runtime_put(&pdev->dev); - platform_set_drvdata(pdev, shdev); dma_async_device_register(&shdev->common); return err; @@ -1278,6 +1279,8 @@ rst_err: if (dmars) iounmap(shdev->dmars); + + platform_set_drvdata(pdev, NULL); emapdmars: iounmap(shdev->chan_reg); synchronize_rcu(); @@ -1316,6 +1319,8 @@ static int __exit sh_dmae_remove(struct platform_device *pdev) iounmap(shdev->dmars); iounmap(shdev->chan_reg); + platform_set_drvdata(pdev, NULL); + synchronize_rcu(); kfree(shdev); diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c index 94ee15dd3ae..8f222d4db7d 100644 --- a/drivers/dma/ste_dma40.c +++ b/drivers/dma/ste_dma40.c @@ -1829,7 +1829,7 @@ d40_get_dev_addr(struct d40_chan *chan, enum dma_data_direction direction) { struct stedma40_platform_data *plat = chan->base->plat_data; struct stedma40_chan_cfg *cfg = &chan->dma_cfg; - dma_addr_t addr; + dma_addr_t addr = 0; if (chan->runtime_addr) return chan->runtime_addr; @@ -2962,4 +2962,4 @@ static int __init stedma40_init(void) { return platform_driver_probe(&d40_driver, d40_probe); } -arch_initcall(stedma40_init); +subsys_initcall(stedma40_init); diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index d2136460375..4a7f6314345 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -86,6 +86,34 @@ config GPIO_IT8761E help Say yes here to support GPIO functionality of IT8761E super I/O chip. +config GPIO_EXYNOS4 + bool "Samsung Exynos4 GPIO library support" + default y if CPU_EXYNOS4210 + depends on ARM + help + Say yes here to support Samsung Exynos4 series SoCs GPIO library + +config GPIO_PLAT_SAMSUNG + bool "Samsung SoCs GPIO library support" + default y if SAMSUNG_GPIOLIB_4BIT + depends on ARM + help + Say yes here to support Samsung SoCs GPIO library + +config GPIO_S5PC100 + bool "Samsung S5PC100 GPIO library support" + default y if CPU_S5PC100 + depends on ARM + help + Say yes here to support Samsung S5PC100 SoCs GPIO library + +config GPIO_S5PV210 + bool "Samsung S5PV210/S5PC110 GPIO library support" + default y if CPU_S5PV210 + depends on ARM + help + Say yes here to support Samsung S5PV210/S5PC110 SoCs GPIO library + config GPIO_PL061 bool "PrimeCell PL061 GPIO support" depends on ARM_AMBA @@ -303,7 +331,7 @@ comment "PCI GPIO expanders:" config GPIO_CS5535 tristate "AMD CS5535/CS5536 GPIO support" - depends on PCI && X86 && !CS5535_GPIO + depends on PCI && X86 && !CS5535_GPIO && MFD_CS5535 help The AMD CS5535 and CS5536 southbridges support 28 GPIO pins that can be used for quite a number of things. The CS5535/6 is found on @@ -334,13 +362,19 @@ config GPIO_LANGWELL Say Y here to support Intel Langwell/Penwell GPIO. config GPIO_PCH - tristate "PCH GPIO of Intel Topcliff" + tristate "Intel EG20T PCH / OKI SEMICONDUCTOR ML7223 IOH GPIO" depends on PCI && X86 help This driver is for PCH(Platform controller Hub) GPIO of Intel Topcliff which is an IOH(Input/Output Hub) for x86 embedded processor. This driver can access PCH GPIO device. + This driver also can be used for OKI SEMICONDUCTOR IOH(Input/ + Output Hub), ML7223. + ML7223 IOH is for MP(Media Phone) use. + ML7223 is companion chip for Intel Atom E6xx series. + ML7223 is completely compatible for Intel EG20T PCH. + config GPIO_ML_IOH tristate "OKI SEMICONDUCTOR ML7213 IOH GPIO support" depends on PCI diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 6a3387acc0e..b605f8ec6fb 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -8,6 +8,10 @@ obj-$(CONFIG_GPIO_ADP5520) += adp5520-gpio.o obj-$(CONFIG_GPIO_ADP5588) += adp5588-gpio.o obj-$(CONFIG_GPIO_BASIC_MMIO_CORE) += basic_mmio_gpio.o obj-$(CONFIG_GPIO_BASIC_MMIO) += basic_mmio_gpio.o +obj-$(CONFIG_GPIO_EXYNOS4) += gpio-exynos4.o +obj-$(CONFIG_GPIO_PLAT_SAMSUNG) += gpio-plat-samsung.o +obj-$(CONFIG_GPIO_S5PC100) += gpio-s5pc100.o +obj-$(CONFIG_GPIO_S5PV210) += gpio-s5pv210.o obj-$(CONFIG_GPIO_LANGWELL) += langwell_gpio.o obj-$(CONFIG_GPIO_MAX730X) += max730x.o obj-$(CONFIG_GPIO_MAX7300) += max7300.o @@ -16,6 +20,7 @@ obj-$(CONFIG_GPIO_MAX732X) += max732x.o obj-$(CONFIG_GPIO_MC33880) += mc33880.o obj-$(CONFIG_GPIO_MCP23S08) += mcp23s08.o obj-$(CONFIG_GPIO_74X164) += 74x164.o +obj-$(CONFIG_ARCH_OMAP) += gpio-omap.o obj-$(CONFIG_GPIO_PCA953X) += pca953x.o obj-$(CONFIG_GPIO_PCF857X) += pcf857x.o obj-$(CONFIG_GPIO_PCH) += pch_gpio.o @@ -34,6 +39,8 @@ obj-$(CONFIG_GPIO_WM831X) += wm831x-gpio.o obj-$(CONFIG_GPIO_WM8350) += wm8350-gpiolib.o obj-$(CONFIG_GPIO_WM8994) += wm8994-gpio.o obj-$(CONFIG_GPIO_SCH) += sch_gpio.o +obj-$(CONFIG_MACH_U300) += gpio-u300.o +obj-$(CONFIG_PLAT_NOMADIK) += gpio-nomadik.o obj-$(CONFIG_GPIO_RDC321X) += rdc321x-gpio.o obj-$(CONFIG_GPIO_JANZ_TTL) += janz-ttl.o obj-$(CONFIG_GPIO_SX150X) += sx150x.o diff --git a/arch/arm/mach-exynos4/gpiolib.c b/drivers/gpio/gpio-exynos4.c index d54ca6adb66..d54ca6adb66 100644 --- a/arch/arm/mach-exynos4/gpiolib.c +++ b/drivers/gpio/gpio-exynos4.c diff --git a/arch/arm/plat-nomadik/gpio.c b/drivers/gpio/gpio-nomadik.c index 307b8131aa8..4961ef9bc15 100644 --- a/arch/arm/plat-nomadik/gpio.c +++ b/drivers/gpio/gpio-nomadik.c @@ -57,6 +57,7 @@ struct nmk_gpio_chip { u32 fwimsc; u32 slpm; u32 enabled; + u32 pull_up; }; static struct nmk_gpio_chip * @@ -103,16 +104,22 @@ static void __nmk_gpio_set_pull(struct nmk_gpio_chip *nmk_chip, u32 pdis; pdis = readl(nmk_chip->addr + NMK_GPIO_PDIS); - if (pull == NMK_GPIO_PULL_NONE) + if (pull == NMK_GPIO_PULL_NONE) { pdis |= bit; - else + nmk_chip->pull_up &= ~bit; + } else { pdis &= ~bit; + } + writel(pdis, nmk_chip->addr + NMK_GPIO_PDIS); - if (pull == NMK_GPIO_PULL_UP) + if (pull == NMK_GPIO_PULL_UP) { + nmk_chip->pull_up |= bit; writel(bit, nmk_chip->addr + NMK_GPIO_DATS); - else if (pull == NMK_GPIO_PULL_DOWN) + } else if (pull == NMK_GPIO_PULL_DOWN) { + nmk_chip->pull_up &= ~bit; writel(bit, nmk_chip->addr + NMK_GPIO_DATC); + } } static void __nmk_gpio_make_input(struct nmk_gpio_chip *nmk_chip, @@ -811,20 +818,43 @@ static void nmk_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) bool pull; u32 bit = 1 << i; - if (!label) - continue; - is_out = readl(nmk_chip->addr + NMK_GPIO_DIR) & bit; pull = !(readl(nmk_chip->addr + NMK_GPIO_PDIS) & bit); mode = nmk_gpio_get_mode(gpio); seq_printf(s, " gpio-%-3d (%-20.20s) %s %s %s %s", - gpio, label, + gpio, label ?: "(none)", is_out ? "out" : "in ", chip->get ? (chip->get(chip, i) ? "hi" : "lo") : "? ", (mode < 0) ? "unknown" : modes[mode], pull ? "pull" : "none"); + + if (label && !is_out) { + int irq = gpio_to_irq(gpio); + struct irq_desc *desc = irq_to_desc(irq); + + /* This races with request_irq(), set_irq_type(), + * and set_irq_wake() ... but those are "rare". + */ + if (irq >= 0 && desc->action) { + char *trigger; + u32 bitmask = nmk_gpio_get_bitmask(gpio); + + if (nmk_chip->edge_rising & bitmask) + trigger = "edge-rising"; + else if (nmk_chip->edge_falling & bitmask) + trigger = "edge-falling"; + else + trigger = "edge-undefined"; + + seq_printf(s, " irq-%d %s%s", + irq, trigger, + irqd_is_wakeup_set(&desc->irq_data) + ? " wakeup" : ""); + } + } + seq_printf(s, "\n"); } } @@ -898,6 +928,25 @@ void nmk_gpio_wakeups_resume(void) } } +/* + * Read the pull up/pull down status. + * A bit set in 'pull_up' means that pull up + * is selected if pull is enabled in PDIS register. + * Note: only pull up/down set via this driver can + * be detected due to HW limitations. + */ +void nmk_gpio_read_pull(int gpio_bank, u32 *pull_up) +{ + if (gpio_bank < NUM_BANKS) { + struct nmk_gpio_chip *chip = nmk_gpio_chips[gpio_bank]; + + if (!chip) + return; + + *pull_up = chip->pull_up; + } +} + static int __devinit nmk_gpio_probe(struct platform_device *dev) { struct nmk_gpio_platform_data *pdata = dev->dev.platform_data; diff --git a/arch/arm/plat-omap/gpio.c b/drivers/gpio/gpio-omap.c index efb86939019..6c51191da56 100644 --- a/arch/arm/plat-omap/gpio.c +++ b/drivers/gpio/gpio-omap.c @@ -1,6 +1,4 @@ /* - * linux/arch/arm/plat-omap/gpio.c - * * Support functions for OMAP GPIO * * Copyright (C) 2003-2005 Nokia Corporation @@ -30,109 +28,6 @@ #include <mach/gpio.h> #include <asm/mach/irq.h> -/* - * OMAP1510 GPIO registers - */ -#define OMAP1510_GPIO_DATA_INPUT 0x00 -#define OMAP1510_GPIO_DATA_OUTPUT 0x04 -#define OMAP1510_GPIO_DIR_CONTROL 0x08 -#define OMAP1510_GPIO_INT_CONTROL 0x0c -#define OMAP1510_GPIO_INT_MASK 0x10 -#define OMAP1510_GPIO_INT_STATUS 0x14 -#define OMAP1510_GPIO_PIN_CONTROL 0x18 - -#define OMAP1510_IH_GPIO_BASE 64 - -/* - * OMAP1610 specific GPIO registers - */ -#define OMAP1610_GPIO_REVISION 0x0000 -#define OMAP1610_GPIO_SYSCONFIG 0x0010 -#define OMAP1610_GPIO_SYSSTATUS 0x0014 -#define OMAP1610_GPIO_IRQSTATUS1 0x0018 -#define OMAP1610_GPIO_IRQENABLE1 0x001c -#define OMAP1610_GPIO_WAKEUPENABLE 0x0028 -#define OMAP1610_GPIO_DATAIN 0x002c -#define OMAP1610_GPIO_DATAOUT 0x0030 -#define OMAP1610_GPIO_DIRECTION 0x0034 -#define OMAP1610_GPIO_EDGE_CTRL1 0x0038 -#define OMAP1610_GPIO_EDGE_CTRL2 0x003c -#define OMAP1610_GPIO_CLEAR_IRQENABLE1 0x009c -#define OMAP1610_GPIO_CLEAR_WAKEUPENA 0x00a8 -#define OMAP1610_GPIO_CLEAR_DATAOUT 0x00b0 -#define OMAP1610_GPIO_SET_IRQENABLE1 0x00dc -#define OMAP1610_GPIO_SET_WAKEUPENA 0x00e8 -#define OMAP1610_GPIO_SET_DATAOUT 0x00f0 - -/* - * OMAP7XX specific GPIO registers - */ -#define OMAP7XX_GPIO_DATA_INPUT 0x00 -#define OMAP7XX_GPIO_DATA_OUTPUT 0x04 -#define OMAP7XX_GPIO_DIR_CONTROL 0x08 -#define OMAP7XX_GPIO_INT_CONTROL 0x0c -#define OMAP7XX_GPIO_INT_MASK 0x10 -#define OMAP7XX_GPIO_INT_STATUS 0x14 - -/* - * omap2+ specific GPIO registers - */ -#define OMAP24XX_GPIO_REVISION 0x0000 -#define OMAP24XX_GPIO_IRQSTATUS1 0x0018 -#define OMAP24XX_GPIO_IRQSTATUS2 0x0028 -#define OMAP24XX_GPIO_IRQENABLE2 0x002c -#define OMAP24XX_GPIO_IRQENABLE1 0x001c -#define OMAP24XX_GPIO_WAKE_EN 0x0020 -#define OMAP24XX_GPIO_CTRL 0x0030 -#define OMAP24XX_GPIO_OE 0x0034 -#define OMAP24XX_GPIO_DATAIN 0x0038 -#define OMAP24XX_GPIO_DATAOUT 0x003c -#define OMAP24XX_GPIO_LEVELDETECT0 0x0040 -#define OMAP24XX_GPIO_LEVELDETECT1 0x0044 -#define OMAP24XX_GPIO_RISINGDETECT 0x0048 -#define OMAP24XX_GPIO_FALLINGDETECT 0x004c -#define OMAP24XX_GPIO_DEBOUNCE_EN 0x0050 -#define OMAP24XX_GPIO_DEBOUNCE_VAL 0x0054 -#define OMAP24XX_GPIO_CLEARIRQENABLE1 0x0060 -#define OMAP24XX_GPIO_SETIRQENABLE1 0x0064 -#define OMAP24XX_GPIO_CLEARWKUENA 0x0080 -#define OMAP24XX_GPIO_SETWKUENA 0x0084 -#define OMAP24XX_GPIO_CLEARDATAOUT 0x0090 -#define OMAP24XX_GPIO_SETDATAOUT 0x0094 - -#define OMAP4_GPIO_REVISION 0x0000 -#define OMAP4_GPIO_EOI 0x0020 -#define OMAP4_GPIO_IRQSTATUSRAW0 0x0024 -#define OMAP4_GPIO_IRQSTATUSRAW1 0x0028 -#define OMAP4_GPIO_IRQSTATUS0 0x002c -#define OMAP4_GPIO_IRQSTATUS1 0x0030 -#define OMAP4_GPIO_IRQSTATUSSET0 0x0034 -#define OMAP4_GPIO_IRQSTATUSSET1 0x0038 -#define OMAP4_GPIO_IRQSTATUSCLR0 0x003c -#define OMAP4_GPIO_IRQSTATUSCLR1 0x0040 -#define OMAP4_GPIO_IRQWAKEN0 0x0044 -#define OMAP4_GPIO_IRQWAKEN1 0x0048 -#define OMAP4_GPIO_IRQENABLE1 0x011c -#define OMAP4_GPIO_WAKE_EN 0x0120 -#define OMAP4_GPIO_IRQSTATUS2 0x0128 -#define OMAP4_GPIO_IRQENABLE2 0x012c -#define OMAP4_GPIO_CTRL 0x0130 -#define OMAP4_GPIO_OE 0x0134 -#define OMAP4_GPIO_DATAIN 0x0138 -#define OMAP4_GPIO_DATAOUT 0x013c -#define OMAP4_GPIO_LEVELDETECT0 0x0140 -#define OMAP4_GPIO_LEVELDETECT1 0x0144 -#define OMAP4_GPIO_RISINGDETECT 0x0148 -#define OMAP4_GPIO_FALLINGDETECT 0x014c -#define OMAP4_GPIO_DEBOUNCENABLE 0x0150 -#define OMAP4_GPIO_DEBOUNCINGTIME 0x0154 -#define OMAP4_GPIO_CLEARIRQENABLE1 0x0160 -#define OMAP4_GPIO_SETIRQENABLE1 0x0164 -#define OMAP4_GPIO_CLEARWKUENA 0x0180 -#define OMAP4_GPIO_SETWKUENA 0x0184 -#define OMAP4_GPIO_CLEARDATAOUT 0x0190 -#define OMAP4_GPIO_SETDATAOUT 0x0194 - struct gpio_bank { unsigned long pbase; void __iomem *base; diff --git a/arch/arm/plat-samsung/gpiolib.c b/drivers/gpio/gpio-plat-samsung.c index ea37c046178..ea37c046178 100644 --- a/arch/arm/plat-samsung/gpiolib.c +++ b/drivers/gpio/gpio-plat-samsung.c diff --git a/arch/arm/mach-s5pc100/gpiolib.c b/drivers/gpio/gpio-s5pc100.c index 2842394b28b..2842394b28b 100644 --- a/arch/arm/mach-s5pc100/gpiolib.c +++ b/drivers/gpio/gpio-s5pc100.c diff --git a/arch/arm/mach-s5pv210/gpiolib.c b/drivers/gpio/gpio-s5pv210.c index 1ba20a703e0..1ba20a703e0 100644 --- a/arch/arm/mach-s5pv210/gpiolib.c +++ b/drivers/gpio/gpio-s5pv210.c diff --git a/arch/arm/mach-u300/gpio.c b/drivers/gpio/gpio-u300.c index d92790140fe..d92790140fe 100644 --- a/arch/arm/mach-u300/gpio.c +++ b/drivers/gpio/gpio-u300.c diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 137a8ca6782..a971e3d043b 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -1296,7 +1296,7 @@ EXPORT_SYMBOL_GPL(gpio_request_one); * @array: array of the 'struct gpio' * @num: how many GPIOs in the array */ -int gpio_request_array(struct gpio *array, size_t num) +int gpio_request_array(const struct gpio *array, size_t num) { int i, err; @@ -1319,7 +1319,7 @@ EXPORT_SYMBOL_GPL(gpio_request_array); * @array: array of the 'struct gpio' * @num: how many GPIOs in the array */ -void gpio_free_array(struct gpio *array, size_t num) +void gpio_free_array(const struct gpio *array, size_t num) { while (num--) gpio_free((array++)->gpio); diff --git a/drivers/gpio/langwell_gpio.c b/drivers/gpio/langwell_gpio.c index 1b06f67e1f6..bd6571e0097 100644 --- a/drivers/gpio/langwell_gpio.c +++ b/drivers/gpio/langwell_gpio.c @@ -33,6 +33,7 @@ #include <linux/io.h> #include <linux/gpio.h> #include <linux/slab.h> +#include <linux/pm_runtime.h> /* * Langwell chip has 64 pins and thus there are 2 32bit registers to control @@ -63,6 +64,7 @@ struct lnw_gpio { void *reg_base; spinlock_t lock; unsigned irq_base; + struct pci_dev *pdev; }; static void __iomem *gpio_reg(struct gpio_chip *chip, unsigned offset, @@ -104,11 +106,18 @@ static int lnw_gpio_direction_input(struct gpio_chip *chip, unsigned offset) u32 value; unsigned long flags; + if (lnw->pdev) + pm_runtime_get(&lnw->pdev->dev); + spin_lock_irqsave(&lnw->lock, flags); value = readl(gpdr); value &= ~BIT(offset % 32); writel(value, gpdr); spin_unlock_irqrestore(&lnw->lock, flags); + + if (lnw->pdev) + pm_runtime_put(&lnw->pdev->dev); + return 0; } @@ -120,11 +129,19 @@ static int lnw_gpio_direction_output(struct gpio_chip *chip, unsigned long flags; lnw_gpio_set(chip, offset, value); + + if (lnw->pdev) + pm_runtime_get(&lnw->pdev->dev); + spin_lock_irqsave(&lnw->lock, flags); value = readl(gpdr); value |= BIT(offset % 32); writel(value, gpdr); spin_unlock_irqrestore(&lnw->lock, flags); + + if (lnw->pdev) + pm_runtime_put(&lnw->pdev->dev); + return 0; } @@ -145,6 +162,10 @@ static int lnw_irq_type(struct irq_data *d, unsigned type) if (gpio >= lnw->chip.ngpio) return -EINVAL; + + if (lnw->pdev) + pm_runtime_get(&lnw->pdev->dev); + spin_lock_irqsave(&lnw->lock, flags); if (type & IRQ_TYPE_EDGE_RISING) value = readl(grer) | BIT(gpio % 32); @@ -159,6 +180,9 @@ static int lnw_irq_type(struct irq_data *d, unsigned type) writel(value, gfer); spin_unlock_irqrestore(&lnw->lock, flags); + if (lnw->pdev) + pm_runtime_put(&lnw->pdev->dev); + return 0; } @@ -211,6 +235,39 @@ static void lnw_irq_handler(unsigned irq, struct irq_desc *desc) chip->irq_eoi(data); } +#ifdef CONFIG_PM +static int lnw_gpio_runtime_resume(struct device *dev) +{ + return 0; +} + +static int lnw_gpio_runtime_suspend(struct device *dev) +{ + return 0; +} + +static int lnw_gpio_runtime_idle(struct device *dev) +{ + int err = pm_schedule_suspend(dev, 500); + + if (!err) + return 0; + + return -EBUSY; +} + +#else +#define lnw_gpio_runtime_suspend NULL +#define lnw_gpio_runtime_resume NULL +#define lnw_gpio_runtime_idle NULL +#endif + +static const struct dev_pm_ops lnw_gpio_pm_ops = { + .runtime_suspend = lnw_gpio_runtime_suspend, + .runtime_resume = lnw_gpio_runtime_resume, + .runtime_idle = lnw_gpio_runtime_idle, +}; + static int __devinit lnw_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -270,6 +327,7 @@ static int __devinit lnw_gpio_probe(struct pci_dev *pdev, lnw->chip.base = gpio_base; lnw->chip.ngpio = id->driver_data; lnw->chip.can_sleep = 0; + lnw->pdev = pdev; pci_set_drvdata(pdev, lnw); retval = gpiochip_add(&lnw->chip); if (retval) { @@ -285,6 +343,10 @@ static int __devinit lnw_gpio_probe(struct pci_dev *pdev, } spin_lock_init(&lnw->lock); + + pm_runtime_put_noidle(&pdev->dev); + pm_runtime_allow(&pdev->dev); + goto done; err5: kfree(lnw); @@ -302,6 +364,9 @@ static struct pci_driver lnw_gpio_driver = { .name = "langwell_gpio", .id_table = lnw_gpio_ids, .probe = lnw_gpio_probe, + .driver = { + .pm = &lnw_gpio_pm_ops, + }, }; diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c index 78a843947d8..0451d7ac94a 100644 --- a/drivers/gpio/pca953x.c +++ b/drivers/gpio/pca953x.c @@ -24,33 +24,46 @@ #include <linux/of_gpio.h> #endif -#define PCA953X_INPUT 0 -#define PCA953X_OUTPUT 1 -#define PCA953X_INVERT 2 -#define PCA953X_DIRECTION 3 - -#define PCA953X_GPIOS 0x00FF -#define PCA953X_INT 0x0100 +#define PCA953X_INPUT 0 +#define PCA953X_OUTPUT 1 +#define PCA953X_INVERT 2 +#define PCA953X_DIRECTION 3 + +#define PCA957X_IN 0 +#define PCA957X_INVRT 1 +#define PCA957X_BKEN 2 +#define PCA957X_PUPD 3 +#define PCA957X_CFG 4 +#define PCA957X_OUT 5 +#define PCA957X_MSK 6 +#define PCA957X_INTS 7 + +#define PCA_GPIO_MASK 0x00FF +#define PCA_INT 0x0100 +#define PCA953X_TYPE 0x1000 +#define PCA957X_TYPE 0x2000 static const struct i2c_device_id pca953x_id[] = { - { "pca9534", 8 | PCA953X_INT, }, - { "pca9535", 16 | PCA953X_INT, }, - { "pca9536", 4, }, - { "pca9537", 4 | PCA953X_INT, }, - { "pca9538", 8 | PCA953X_INT, }, - { "pca9539", 16 | PCA953X_INT, }, - { "pca9554", 8 | PCA953X_INT, }, - { "pca9555", 16 | PCA953X_INT, }, - { "pca9556", 8, }, - { "pca9557", 8, }, - - { "max7310", 8, }, - { "max7312", 16 | PCA953X_INT, }, - { "max7313", 16 | PCA953X_INT, }, - { "max7315", 8 | PCA953X_INT, }, - { "pca6107", 8 | PCA953X_INT, }, - { "tca6408", 8 | PCA953X_INT, }, - { "tca6416", 16 | PCA953X_INT, }, + { "pca9534", 8 | PCA953X_TYPE | PCA_INT, }, + { "pca9535", 16 | PCA953X_TYPE | PCA_INT, }, + { "pca9536", 4 | PCA953X_TYPE, }, + { "pca9537", 4 | PCA953X_TYPE | PCA_INT, }, + { "pca9538", 8 | PCA953X_TYPE | PCA_INT, }, + { "pca9539", 16 | PCA953X_TYPE | PCA_INT, }, + { "pca9554", 8 | PCA953X_TYPE | PCA_INT, }, + { "pca9555", 16 | PCA953X_TYPE | PCA_INT, }, + { "pca9556", 8 | PCA953X_TYPE, }, + { "pca9557", 8 | PCA953X_TYPE, }, + { "pca9574", 8 | PCA957X_TYPE | PCA_INT, }, + { "pca9575", 16 | PCA957X_TYPE | PCA_INT, }, + + { "max7310", 8 | PCA953X_TYPE, }, + { "max7312", 16 | PCA953X_TYPE | PCA_INT, }, + { "max7313", 16 | PCA953X_TYPE | PCA_INT, }, + { "max7315", 8 | PCA953X_TYPE | PCA_INT, }, + { "pca6107", 8 | PCA953X_TYPE | PCA_INT, }, + { "tca6408", 8 | PCA953X_TYPE | PCA_INT, }, + { "tca6416", 16 | PCA953X_TYPE | PCA_INT, }, /* NYET: { "tca6424", 24, }, */ { } }; @@ -75,16 +88,32 @@ struct pca953x_chip { struct pca953x_platform_data *dyn_pdata; struct gpio_chip gpio_chip; const char *const *names; + int chip_type; }; static int pca953x_write_reg(struct pca953x_chip *chip, int reg, uint16_t val) { - int ret; + int ret = 0; if (chip->gpio_chip.ngpio <= 8) ret = i2c_smbus_write_byte_data(chip->client, reg, val); - else - ret = i2c_smbus_write_word_data(chip->client, reg << 1, val); + else { + switch (chip->chip_type) { + case PCA953X_TYPE: + ret = i2c_smbus_write_word_data(chip->client, + reg << 1, val); + break; + case PCA957X_TYPE: + ret = i2c_smbus_write_byte_data(chip->client, reg << 1, + val & 0xff); + if (ret < 0) + break; + ret = i2c_smbus_write_byte_data(chip->client, + (reg << 1) + 1, + (val & 0xff00) >> 8); + break; + } + } if (ret < 0) { dev_err(&chip->client->dev, "failed writing register\n"); @@ -116,13 +145,22 @@ static int pca953x_gpio_direction_input(struct gpio_chip *gc, unsigned off) { struct pca953x_chip *chip; uint16_t reg_val; - int ret; + int ret, offset = 0; chip = container_of(gc, struct pca953x_chip, gpio_chip); mutex_lock(&chip->i2c_lock); reg_val = chip->reg_direction | (1u << off); - ret = pca953x_write_reg(chip, PCA953X_DIRECTION, reg_val); + + switch (chip->chip_type) { + case PCA953X_TYPE: + offset = PCA953X_DIRECTION; + break; + case PCA957X_TYPE: + offset = PCA957X_CFG; + break; + } + ret = pca953x_write_reg(chip, offset, reg_val); if (ret) goto exit; @@ -138,7 +176,7 @@ static int pca953x_gpio_direction_output(struct gpio_chip *gc, { struct pca953x_chip *chip; uint16_t reg_val; - int ret; + int ret, offset = 0; chip = container_of(gc, struct pca953x_chip, gpio_chip); @@ -149,7 +187,15 @@ static int pca953x_gpio_direction_output(struct gpio_chip *gc, else reg_val = chip->reg_output & ~(1u << off); - ret = pca953x_write_reg(chip, PCA953X_OUTPUT, reg_val); + switch (chip->chip_type) { + case PCA953X_TYPE: + offset = PCA953X_OUTPUT; + break; + case PCA957X_TYPE: + offset = PCA957X_OUT; + break; + } + ret = pca953x_write_reg(chip, offset, reg_val); if (ret) goto exit; @@ -157,7 +203,15 @@ static int pca953x_gpio_direction_output(struct gpio_chip *gc, /* then direction */ reg_val = chip->reg_direction & ~(1u << off); - ret = pca953x_write_reg(chip, PCA953X_DIRECTION, reg_val); + switch (chip->chip_type) { + case PCA953X_TYPE: + offset = PCA953X_DIRECTION; + break; + case PCA957X_TYPE: + offset = PCA957X_CFG; + break; + } + ret = pca953x_write_reg(chip, offset, reg_val); if (ret) goto exit; @@ -172,12 +226,20 @@ static int pca953x_gpio_get_value(struct gpio_chip *gc, unsigned off) { struct pca953x_chip *chip; uint16_t reg_val; - int ret; + int ret, offset = 0; chip = container_of(gc, struct pca953x_chip, gpio_chip); mutex_lock(&chip->i2c_lock); - ret = pca953x_read_reg(chip, PCA953X_INPUT, ®_val); + switch (chip->chip_type) { + case PCA953X_TYPE: + offset = PCA953X_INPUT; + break; + case PCA957X_TYPE: + offset = PCA957X_IN; + break; + } + ret = pca953x_read_reg(chip, offset, ®_val); mutex_unlock(&chip->i2c_lock); if (ret < 0) { /* NOTE: diagnostic already emitted; that's all we should @@ -194,7 +256,7 @@ static void pca953x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val) { struct pca953x_chip *chip; uint16_t reg_val; - int ret; + int ret, offset = 0; chip = container_of(gc, struct pca953x_chip, gpio_chip); @@ -204,7 +266,15 @@ static void pca953x_gpio_set_value(struct gpio_chip *gc, unsigned off, int val) else reg_val = chip->reg_output & ~(1u << off); - ret = pca953x_write_reg(chip, PCA953X_OUTPUT, reg_val); + switch (chip->chip_type) { + case PCA953X_TYPE: + offset = PCA953X_OUTPUT; + break; + case PCA957X_TYPE: + offset = PCA957X_OUT; + break; + } + ret = pca953x_write_reg(chip, offset, reg_val); if (ret) goto exit; @@ -322,9 +392,17 @@ static uint16_t pca953x_irq_pending(struct pca953x_chip *chip) uint16_t old_stat; uint16_t pending; uint16_t trigger; - int ret; - - ret = pca953x_read_reg(chip, PCA953X_INPUT, &cur_stat); + int ret, offset = 0; + + switch (chip->chip_type) { + case PCA953X_TYPE: + offset = PCA953X_INPUT; + break; + case PCA957X_TYPE: + offset = PCA957X_IN; + break; + } + ret = pca953x_read_reg(chip, offset, &cur_stat); if (ret) return 0; @@ -372,14 +450,21 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, { struct i2c_client *client = chip->client; struct pca953x_platform_data *pdata = client->dev.platform_data; - int ret; + int ret, offset = 0; if (pdata->irq_base != -1 - && (id->driver_data & PCA953X_INT)) { + && (id->driver_data & PCA_INT)) { int lvl; - ret = pca953x_read_reg(chip, PCA953X_INPUT, - &chip->irq_stat); + switch (chip->chip_type) { + case PCA953X_TYPE: + offset = PCA953X_INPUT; + break; + case PCA957X_TYPE: + offset = PCA957X_IN; + break; + } + ret = pca953x_read_reg(chip, offset, &chip->irq_stat); if (ret) goto out_failed; @@ -439,7 +524,7 @@ static int pca953x_irq_setup(struct pca953x_chip *chip, struct i2c_client *client = chip->client; struct pca953x_platform_data *pdata = client->dev.platform_data; - if (pdata->irq_base != -1 && (id->driver_data & PCA953X_INT)) + if (pdata->irq_base != -1 && (id->driver_data & PCA_INT)) dev_warn(&client->dev, "interrupt support not compiled in\n"); return 0; @@ -499,12 +584,65 @@ pca953x_get_alt_pdata(struct i2c_client *client) } #endif +static int __devinit device_pca953x_init(struct pca953x_chip *chip, int invert) +{ + int ret; + + ret = pca953x_read_reg(chip, PCA953X_OUTPUT, &chip->reg_output); + if (ret) + goto out; + + ret = pca953x_read_reg(chip, PCA953X_DIRECTION, + &chip->reg_direction); + if (ret) + goto out; + + /* set platform specific polarity inversion */ + ret = pca953x_write_reg(chip, PCA953X_INVERT, invert); + if (ret) + goto out; + return 0; +out: + return ret; +} + +static int __devinit device_pca957x_init(struct pca953x_chip *chip, int invert) +{ + int ret; + uint16_t val = 0; + + /* Let every port in proper state, that could save power */ + pca953x_write_reg(chip, PCA957X_PUPD, 0x0); + pca953x_write_reg(chip, PCA957X_CFG, 0xffff); + pca953x_write_reg(chip, PCA957X_OUT, 0x0); + + ret = pca953x_read_reg(chip, PCA957X_IN, &val); + if (ret) + goto out; + ret = pca953x_read_reg(chip, PCA957X_OUT, &chip->reg_output); + if (ret) + goto out; + ret = pca953x_read_reg(chip, PCA957X_CFG, &chip->reg_direction); + if (ret) + goto out; + + /* set platform specific polarity inversion */ + pca953x_write_reg(chip, PCA957X_INVRT, invert); + + /* To enable register 6, 7 to controll pull up and pull down */ + pca953x_write_reg(chip, PCA957X_BKEN, 0x202); + + return 0; +out: + return ret; +} + static int __devinit pca953x_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct pca953x_platform_data *pdata; struct pca953x_chip *chip; - int ret; + int ret = 0; chip = kzalloc(sizeof(struct pca953x_chip), GFP_KERNEL); if (chip == NULL) @@ -531,25 +669,20 @@ static int __devinit pca953x_probe(struct i2c_client *client, chip->gpio_start = pdata->gpio_base; chip->names = pdata->names; + chip->chip_type = id->driver_data & (PCA953X_TYPE | PCA957X_TYPE); mutex_init(&chip->i2c_lock); /* initialize cached registers from their original values. * we can't share this chip with another i2c master. */ - pca953x_setup_gpio(chip, id->driver_data & PCA953X_GPIOS); + pca953x_setup_gpio(chip, id->driver_data & PCA_GPIO_MASK); - ret = pca953x_read_reg(chip, PCA953X_OUTPUT, &chip->reg_output); - if (ret) - goto out_failed; - - ret = pca953x_read_reg(chip, PCA953X_DIRECTION, &chip->reg_direction); - if (ret) - goto out_failed; - - /* set platform specific polarity inversion */ - ret = pca953x_write_reg(chip, PCA953X_INVERT, pdata->invert); - if (ret) + if (chip->chip_type == PCA953X_TYPE) + device_pca953x_init(chip, pdata->invert); + else if (chip->chip_type == PCA957X_TYPE) + device_pca957x_init(chip, pdata->invert); + else goto out_failed; ret = pca953x_irq_setup(chip, id); diff --git a/drivers/gpio/pch_gpio.c b/drivers/gpio/pch_gpio.c index f970a5f3585..36919e77c49 100644 --- a/drivers/gpio/pch_gpio.c +++ b/drivers/gpio/pch_gpio.c @@ -283,8 +283,10 @@ static int pch_gpio_resume(struct pci_dev *pdev) #define pch_gpio_resume NULL #endif +#define PCI_VENDOR_ID_ROHM 0x10DB static DEFINE_PCI_DEVICE_TABLE(pch_gpio_pcidev_id) = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x8803) }, + { PCI_DEVICE(PCI_VENDOR_ID_ROHM, 0x8014) }, { 0, } }; MODULE_DEVICE_TABLE(pci, pch_gpio_pcidev_id); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 6e5123b1d34..144d27261e4 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1782,7 +1782,6 @@ static int ide_cd_probe(ide_drive_t *drive) ide_cd_read_toc(drive, &sense); g->fops = &idecd_ops; g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - g->events = DISK_EVENT_MEDIA_CHANGE; add_disk(g); return 0; diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c index f3698967edf..8755f5f3ad3 100644 --- a/drivers/input/serio/serport.c +++ b/drivers/input/serio/serport.c @@ -120,21 +120,17 @@ static void serport_ldisc_close(struct tty_struct *tty) * 'interrupt' routine. */ -static unsigned int serport_ldisc_receive(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void serport_ldisc_receive(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct serport *serport = (struct serport*) tty->disc_data; unsigned long flags; unsigned int ch_flags; - int ret = 0; int i; spin_lock_irqsave(&serport->lock, flags); - if (!test_bit(SERPORT_ACTIVE, &serport->flags)) { - ret = -EINVAL; + if (!test_bit(SERPORT_ACTIVE, &serport->flags)) goto out; - } for (i = 0; i < count; i++) { switch (fp[i]) { @@ -156,8 +152,6 @@ static unsigned int serport_ldisc_receive(struct tty_struct *tty, out: spin_unlock_irqrestore(&serport->lock, flags); - - return ret == 0 ? count : ret; } /* diff --git a/drivers/isdn/gigaset/ser-gigaset.c b/drivers/isdn/gigaset/ser-gigaset.c index 1d44d470897..86a5c4f7775 100644 --- a/drivers/isdn/gigaset/ser-gigaset.c +++ b/drivers/isdn/gigaset/ser-gigaset.c @@ -674,7 +674,7 @@ gigaset_tty_ioctl(struct tty_struct *tty, struct file *file, * cflags buffer containing error flags for received characters (ignored) * count number of received characters */ -static unsigned int +static void gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf, char *cflags, int count) { @@ -683,12 +683,12 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf, struct inbuf_t *inbuf; if (!cs) - return -ENODEV; + return; inbuf = cs->inbuf; if (!inbuf) { dev_err(cs->dev, "%s: no inbuf\n", __func__); cs_put(cs); - return -EINVAL; + return; } tail = inbuf->tail; @@ -725,8 +725,6 @@ gigaset_tty_receive(struct tty_struct *tty, const unsigned char *buf, gig_dbg(DEBUG_INTR, "%s-->BH", __func__); gigaset_schedule_event(cs); cs_put(cs); - - return count; } /* diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 76a5af00a26..2067288f61f 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -19,6 +19,8 @@ #define DM_MSG_PREFIX "io" #define DM_IO_MAX_REGIONS BITS_PER_LONG +#define MIN_IOS 16 +#define MIN_BIOS 16 struct dm_io_client { mempool_t *pool; @@ -41,33 +43,21 @@ struct io { static struct kmem_cache *_dm_io_cache; /* - * io contexts are only dynamically allocated for asynchronous - * io. Since async io is likely to be the majority of io we'll - * have the same number of io contexts as bios! (FIXME: must reduce this). - */ - -static unsigned int pages_to_ios(unsigned int pages) -{ - return 4 * pages; /* too many ? */ -} - -/* * Create a client with mempool and bioset. */ -struct dm_io_client *dm_io_client_create(unsigned num_pages) +struct dm_io_client *dm_io_client_create(void) { - unsigned ios = pages_to_ios(num_pages); struct dm_io_client *client; client = kmalloc(sizeof(*client), GFP_KERNEL); if (!client) return ERR_PTR(-ENOMEM); - client->pool = mempool_create_slab_pool(ios, _dm_io_cache); + client->pool = mempool_create_slab_pool(MIN_IOS, _dm_io_cache); if (!client->pool) goto bad; - client->bios = bioset_create(16, 0); + client->bios = bioset_create(MIN_BIOS, 0); if (!client->bios) goto bad; @@ -81,13 +71,6 @@ struct dm_io_client *dm_io_client_create(unsigned num_pages) } EXPORT_SYMBOL(dm_io_client_create); -int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client) -{ - return mempool_resize(client->pool, pages_to_ios(num_pages), - GFP_KERNEL); -} -EXPORT_SYMBOL(dm_io_client_resize); - void dm_io_client_destroy(struct dm_io_client *client) { mempool_destroy(client->pool); diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 1bb73a13ca4..819e37eaaeb 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -27,15 +27,19 @@ #include "dm.h" +#define SUB_JOB_SIZE 128 +#define SPLIT_COUNT 8 +#define MIN_JOBS 8 +#define RESERVE_PAGES (DIV_ROUND_UP(SUB_JOB_SIZE << SECTOR_SHIFT, PAGE_SIZE)) + /*----------------------------------------------------------------- * Each kcopyd client has its own little pool of preallocated * pages for kcopyd io. *---------------------------------------------------------------*/ struct dm_kcopyd_client { - spinlock_t lock; struct page_list *pages; - unsigned int nr_pages; - unsigned int nr_free_pages; + unsigned nr_reserved_pages; + unsigned nr_free_pages; struct dm_io_client *io_client; @@ -67,15 +71,18 @@ static void wake(struct dm_kcopyd_client *kc) queue_work(kc->kcopyd_wq, &kc->kcopyd_work); } -static struct page_list *alloc_pl(void) +/* + * Obtain one page for the use of kcopyd. + */ +static struct page_list *alloc_pl(gfp_t gfp) { struct page_list *pl; - pl = kmalloc(sizeof(*pl), GFP_KERNEL); + pl = kmalloc(sizeof(*pl), gfp); if (!pl) return NULL; - pl->page = alloc_page(GFP_KERNEL); + pl->page = alloc_page(gfp); if (!pl->page) { kfree(pl); return NULL; @@ -90,41 +97,56 @@ static void free_pl(struct page_list *pl) kfree(pl); } -static int kcopyd_get_pages(struct dm_kcopyd_client *kc, - unsigned int nr, struct page_list **pages) +/* + * Add the provided pages to a client's free page list, releasing + * back to the system any beyond the reserved_pages limit. + */ +static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) { - struct page_list *pl; - - spin_lock(&kc->lock); - if (kc->nr_free_pages < nr) { - spin_unlock(&kc->lock); - return -ENOMEM; - } - - kc->nr_free_pages -= nr; - for (*pages = pl = kc->pages; --nr; pl = pl->next) - ; + struct page_list *next; - kc->pages = pl->next; - pl->next = NULL; + do { + next = pl->next; - spin_unlock(&kc->lock); + if (kc->nr_free_pages >= kc->nr_reserved_pages) + free_pl(pl); + else { + pl->next = kc->pages; + kc->pages = pl; + kc->nr_free_pages++; + } - return 0; + pl = next; + } while (pl); } -static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) +static int kcopyd_get_pages(struct dm_kcopyd_client *kc, + unsigned int nr, struct page_list **pages) { - struct page_list *cursor; + struct page_list *pl; + + *pages = NULL; + + do { + pl = alloc_pl(__GFP_NOWARN | __GFP_NORETRY); + if (unlikely(!pl)) { + /* Use reserved pages */ + pl = kc->pages; + if (unlikely(!pl)) + goto out_of_memory; + kc->pages = pl->next; + kc->nr_free_pages--; + } + pl->next = *pages; + *pages = pl; + } while (--nr); - spin_lock(&kc->lock); - for (cursor = pl; cursor->next; cursor = cursor->next) - kc->nr_free_pages++; + return 0; - kc->nr_free_pages++; - cursor->next = kc->pages; - kc->pages = pl; - spin_unlock(&kc->lock); +out_of_memory: + if (*pages) + kcopyd_put_pages(kc, *pages); + return -ENOMEM; } /* @@ -141,13 +163,16 @@ static void drop_pages(struct page_list *pl) } } -static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr) +/* + * Allocate and reserve nr_pages for the use of a specific client. + */ +static int client_reserve_pages(struct dm_kcopyd_client *kc, unsigned nr_pages) { - unsigned int i; + unsigned i; struct page_list *pl = NULL, *next; - for (i = 0; i < nr; i++) { - next = alloc_pl(); + for (i = 0; i < nr_pages; i++) { + next = alloc_pl(GFP_KERNEL); if (!next) { if (pl) drop_pages(pl); @@ -157,17 +182,18 @@ static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr) pl = next; } + kc->nr_reserved_pages += nr_pages; kcopyd_put_pages(kc, pl); - kc->nr_pages += nr; + return 0; } static void client_free_pages(struct dm_kcopyd_client *kc) { - BUG_ON(kc->nr_free_pages != kc->nr_pages); + BUG_ON(kc->nr_free_pages != kc->nr_reserved_pages); drop_pages(kc->pages); kc->pages = NULL; - kc->nr_free_pages = kc->nr_pages = 0; + kc->nr_free_pages = kc->nr_reserved_pages = 0; } /*----------------------------------------------------------------- @@ -216,16 +242,17 @@ struct kcopyd_job { struct mutex lock; atomic_t sub_jobs; sector_t progress; -}; -/* FIXME: this should scale with the number of pages */ -#define MIN_JOBS 512 + struct kcopyd_job *master_job; +}; static struct kmem_cache *_job_cache; int __init dm_kcopyd_init(void) { - _job_cache = KMEM_CACHE(kcopyd_job, 0); + _job_cache = kmem_cache_create("kcopyd_job", + sizeof(struct kcopyd_job) * (SPLIT_COUNT + 1), + __alignof__(struct kcopyd_job), 0, NULL); if (!_job_cache) return -ENOMEM; @@ -299,7 +326,12 @@ static int run_complete_job(struct kcopyd_job *job) if (job->pages) kcopyd_put_pages(kc, job->pages); - mempool_free(job, kc->job_pool); + /* + * If this is the master job, the sub jobs have already + * completed so we can free everything. + */ + if (job->master_job == job) + mempool_free(job, kc->job_pool); fn(read_err, write_err, context); if (atomic_dec_and_test(&kc->nr_jobs)) @@ -460,14 +492,14 @@ static void dispatch_job(struct kcopyd_job *job) wake(kc); } -#define SUB_JOB_SIZE 128 static void segment_complete(int read_err, unsigned long write_err, void *context) { /* FIXME: tidy this function */ sector_t progress = 0; sector_t count = 0; - struct kcopyd_job *job = (struct kcopyd_job *) context; + struct kcopyd_job *sub_job = (struct kcopyd_job *) context; + struct kcopyd_job *job = sub_job->master_job; struct dm_kcopyd_client *kc = job->kc; mutex_lock(&job->lock); @@ -498,8 +530,6 @@ static void segment_complete(int read_err, unsigned long write_err, if (count) { int i; - struct kcopyd_job *sub_job = mempool_alloc(kc->job_pool, - GFP_NOIO); *sub_job = *job; sub_job->source.sector += progress; @@ -511,7 +541,7 @@ static void segment_complete(int read_err, unsigned long write_err, } sub_job->fn = segment_complete; - sub_job->context = job; + sub_job->context = sub_job; dispatch_job(sub_job); } else if (atomic_dec_and_test(&job->sub_jobs)) { @@ -531,19 +561,19 @@ static void segment_complete(int read_err, unsigned long write_err, } /* - * Create some little jobs that will do the move between - * them. + * Create some sub jobs to share the work between them. */ -#define SPLIT_COUNT 8 -static void split_job(struct kcopyd_job *job) +static void split_job(struct kcopyd_job *master_job) { int i; - atomic_inc(&job->kc->nr_jobs); + atomic_inc(&master_job->kc->nr_jobs); - atomic_set(&job->sub_jobs, SPLIT_COUNT); - for (i = 0; i < SPLIT_COUNT; i++) - segment_complete(0, 0u, job); + atomic_set(&master_job->sub_jobs, SPLIT_COUNT); + for (i = 0; i < SPLIT_COUNT; i++) { + master_job[i + 1].master_job = master_job; + segment_complete(0, 0u, &master_job[i + 1]); + } } int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, @@ -553,7 +583,8 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, struct kcopyd_job *job; /* - * Allocate a new job. + * Allocate an array of jobs consisting of one master job + * followed by SPLIT_COUNT sub jobs. */ job = mempool_alloc(kc->job_pool, GFP_NOIO); @@ -577,10 +608,10 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->fn = fn; job->context = context; + job->master_job = job; - if (job->source.count < SUB_JOB_SIZE) + if (job->source.count <= SUB_JOB_SIZE) dispatch_job(job); - else { mutex_init(&job->lock); job->progress = 0; @@ -606,17 +637,15 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) /*----------------------------------------------------------------- * Client setup *---------------------------------------------------------------*/ -int dm_kcopyd_client_create(unsigned int nr_pages, - struct dm_kcopyd_client **result) +struct dm_kcopyd_client *dm_kcopyd_client_create(void) { int r = -ENOMEM; struct dm_kcopyd_client *kc; kc = kmalloc(sizeof(*kc), GFP_KERNEL); if (!kc) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - spin_lock_init(&kc->lock); spin_lock_init(&kc->job_lock); INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); @@ -633,12 +662,12 @@ int dm_kcopyd_client_create(unsigned int nr_pages, goto bad_workqueue; kc->pages = NULL; - kc->nr_pages = kc->nr_free_pages = 0; - r = client_alloc_pages(kc, nr_pages); + kc->nr_reserved_pages = kc->nr_free_pages = 0; + r = client_reserve_pages(kc, RESERVE_PAGES); if (r) goto bad_client_pages; - kc->io_client = dm_io_client_create(nr_pages); + kc->io_client = dm_io_client_create(); if (IS_ERR(kc->io_client)) { r = PTR_ERR(kc->io_client); goto bad_io_client; @@ -647,8 +676,7 @@ int dm_kcopyd_client_create(unsigned int nr_pages, init_waitqueue_head(&kc->destroyq); atomic_set(&kc->nr_jobs, 0); - *result = kc; - return 0; + return kc; bad_io_client: client_free_pages(kc); @@ -659,7 +687,7 @@ bad_workqueue: bad_slab: kfree(kc); - return r; + return ERR_PTR(r); } EXPORT_SYMBOL(dm_kcopyd_client_create); diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index a1f32188967..948e3f4925b 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -449,8 +449,7 @@ static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, lc->io_req.mem.type = DM_IO_VMA; lc->io_req.notify.fn = NULL; - lc->io_req.client = dm_io_client_create(dm_div_up(buf_size, - PAGE_SIZE)); + lc->io_req.client = dm_io_client_create(); if (IS_ERR(lc->io_req.client)) { r = PTR_ERR(lc->io_req.client); DMWARN("couldn't allocate disk io client"); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index a550a057d99..aa4e570c2cb 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1290,7 +1290,7 @@ static int do_end_io(struct multipath *m, struct request *clone, if (!error && !clone->errors) return 0; /* I/O complete */ - if (error == -EOPNOTSUPP || error == -EREMOTEIO) + if (error == -EOPNOTSUPP || error == -EREMOTEIO || error == -EILSEQ) return error; if (mpio->pgpath) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 976ad4688af..9bfd057be68 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -22,8 +22,6 @@ #define DM_MSG_PREFIX "raid1" #define MAX_RECOVERY 1 /* Maximum number of regions recovered in parallel. */ -#define DM_IO_PAGES 64 -#define DM_KCOPYD_PAGES 64 #define DM_RAID1_HANDLE_ERRORS 0x01 #define errors_handled(p) ((p)->features & DM_RAID1_HANDLE_ERRORS) @@ -887,7 +885,7 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, return NULL; } - ms->io_client = dm_io_client_create(DM_IO_PAGES); + ms->io_client = dm_io_client_create(); if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; mempool_destroy(ms->read_record_pool); @@ -1117,9 +1115,11 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err_destroy_wq; } - r = dm_kcopyd_client_create(DM_KCOPYD_PAGES, &ms->kcopyd_client); - if (r) + ms->kcopyd_client = dm_kcopyd_client_create(); + if (IS_ERR(ms->kcopyd_client)) { + r = PTR_ERR(ms->kcopyd_client); goto err_destroy_wq; + } wakeup_mirrord(ms); return 0; diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 95891dfcbca..135c2f1fdbf 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -154,11 +154,6 @@ struct pstore { struct workqueue_struct *metadata_wq; }; -static unsigned sectors_to_pages(unsigned sectors) -{ - return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); -} - static int alloc_area(struct pstore *ps) { int r = -ENOMEM; @@ -318,8 +313,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) chunk_size_supplied = 0; } - ps->io_client = dm_io_client_create(sectors_to_pages(ps->store-> - chunk_size)); + ps->io_client = dm_io_client_create(); if (IS_ERR(ps->io_client)) return PTR_ERR(ps->io_client); @@ -368,11 +362,6 @@ static int read_header(struct pstore *ps, int *new_snapshot) return r; } - r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size), - ps->io_client); - if (r) - return r; - r = alloc_area(ps); return r; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a2d330942cb..9ecff5f3023 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -40,11 +40,6 @@ static const char dm_snapshot_merge_target_name[] = "snapshot-merge"; #define SNAPSHOT_COPY_PRIORITY 2 /* - * Reserve 1MB for each snapshot initially (with minimum of 1 page). - */ -#define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) - -/* * The size of the mempool used to track chunks in use. */ #define MIN_IOS 256 @@ -1116,8 +1111,9 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); - if (r) { + s->kcopyd_client = dm_kcopyd_client_create(); + if (IS_ERR(s->kcopyd_client)) { + r = PTR_ERR(s->kcopyd_client); ti->error = "Could not create kcopyd client"; goto bad_kcopyd; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index cb8380c9767..451c3bb176d 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -362,6 +362,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { + struct request_queue *q; struct queue_limits *limits = data; struct block_device *bdev = dev->bdev; sector_t dev_size = @@ -370,6 +371,22 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, limits->logical_block_size >> SECTOR_SHIFT; char b[BDEVNAME_SIZE]; + /* + * Some devices exist without request functions, + * such as loop devices not yet bound to backing files. + * Forbid the use of such devices. + */ + q = bdev_get_queue(bdev); + if (!q || !q->make_request_fn) { + DMWARN("%s: %s is not yet initialised: " + "start=%llu, len=%llu, dev_size=%llu", + dm_device_name(ti->table->md), bdevname(bdev, b), + (unsigned long long)start, + (unsigned long long)len, + (unsigned long long)dev_size); + return 1; + } + if (!dev_size) return 0; @@ -1346,7 +1363,8 @@ bool dm_table_supports_discards(struct dm_table *t) return 0; /* - * Ensure that at least one underlying device supports discards. + * Unless any target used by the table set discards_supported, + * require at least one underlying device to support discards. * t->devices includes internal dm devices such as mirror logs * so we need to use iterate_devices here, which targets * supporting discard must provide. @@ -1354,6 +1372,9 @@ bool dm_table_supports_discards(struct dm_table *t) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); + if (ti->discards_supported) + return 1; + if (ti->type->iterate_devices && ti->type->iterate_devices(ti, device_discard_capable, NULL)) return 1; diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig index b6c267724e1..0f09c057e79 100644 --- a/drivers/mfd/Kconfig +++ b/drivers/mfd/Kconfig @@ -721,7 +721,7 @@ config MFD_PM8XXX_IRQ config MFD_TPS65910 bool "TPS65910 Power Management chip" - depends on I2C=y + depends on I2C=y && GPIOLIB select MFD_CORE select GPIO_TPS65910 help diff --git a/drivers/mfd/db8500-prcmu.c b/drivers/mfd/db8500-prcmu.c index e63782107e2..02a15d7cb3b 100644 --- a/drivers/mfd/db8500-prcmu.c +++ b/drivers/mfd/db8500-prcmu.c @@ -2005,7 +2005,8 @@ static struct regulator_init_data db8500_regulators[DB8500_NUM_REGULATORS] = { static struct mfd_cell db8500_prcmu_devs[] = { { .name = "db8500-prcmu-regulators", - .mfd_data = &db8500_regulators, + .platform_data = &db8500_regulators, + .pdata_size = sizeof(db8500_regulators), }, { .name = "cpufreq-u8500", diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index b0c56313dbb..8cebec5e85e 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -304,7 +304,10 @@ static int check_and_rewind_pc(char *put_str, char *arg) return 1; } /* Readjust the instruction pointer if needed */ - instruction_pointer_set(&kgdbts_regs, ip + offset); + ip += offset; +#ifdef GDB_ADJUSTS_BREAK_OFFSET + instruction_pointer_set(&kgdbts_regs, ip); +#endif return 0; } diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index 1a05fe08e2c..f91f82eabda 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -747,8 +747,8 @@ static void st_tty_close(struct tty_struct *tty) pr_debug("%s: done ", __func__); } -static unsigned int st_tty_receive(struct tty_struct *tty, - const unsigned char *data, char *tty_flags, int count) +static void st_tty_receive(struct tty_struct *tty, const unsigned char *data, + char *tty_flags, int count) { #ifdef VERBOSE print_hex_dump(KERN_DEBUG, ">in>", DUMP_PREFIX_NONE, @@ -761,8 +761,6 @@ static unsigned int st_tty_receive(struct tty_struct *tty, */ st_recv(tty->disc_data, data, count); pr_debug("done %s", __func__); - - return count; } /* wake-up function called in from the TTY layer diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c index 73c7e03617e..3df0c0f8b8b 100644 --- a/drivers/net/caif/caif_serial.c +++ b/drivers/net/caif/caif_serial.c @@ -167,8 +167,8 @@ static inline void debugfs_tx(struct ser_device *ser, const u8 *data, int size) #endif -static unsigned int ldisc_receive(struct tty_struct *tty, - const u8 *data, char *flags, int count) +static void ldisc_receive(struct tty_struct *tty, const u8 *data, + char *flags, int count) { struct sk_buff *skb = NULL; struct ser_device *ser; @@ -215,8 +215,6 @@ static unsigned int ldisc_receive(struct tty_struct *tty, } else ++ser->dev->stats.rx_dropped; update_tty_status(ser); - - return count; } static int handle_tx(struct ser_device *ser) diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c index 75622d54581..1b49df6b247 100644 --- a/drivers/net/can/slcan.c +++ b/drivers/net/can/slcan.c @@ -425,17 +425,16 @@ static void slc_setup(struct net_device *dev) * in parallel */ -static unsigned int slcan_receive_buf(struct tty_struct *tty, +static void slcan_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct slcan *sl = (struct slcan *) tty->disc_data; - int bytes = count; if (!sl || sl->magic != SLCAN_MAGIC || !netif_running(sl->dev)) - return -ENODEV; + return; /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; @@ -444,8 +443,6 @@ static unsigned int slcan_receive_buf(struct tty_struct *tty, } slcan_unesc(sl, *cp++); } - - return count; } /************************************ diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 992089639ea..3e5d0b6b651 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -456,7 +456,7 @@ out: * a block of 6pack data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. */ -static unsigned int sixpack_receive_buf(struct tty_struct *tty, +static void sixpack_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct sixpack *sp; @@ -464,11 +464,11 @@ static unsigned int sixpack_receive_buf(struct tty_struct *tty, int count1; if (!count) - return 0; + return; sp = sp_get(tty); if (!sp) - return -ENODEV; + return; memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf)); @@ -487,8 +487,6 @@ static unsigned int sixpack_receive_buf(struct tty_struct *tty, sp_put(sp); tty_unthrottle(tty); - - return count1; } /* diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 0e4f2353114..4c628393c8b 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -923,14 +923,13 @@ static long mkiss_compat_ioctl(struct tty_struct *tty, struct file *file, * a block of data has been received, which can now be decapsulated * and sent on to the AX.25 layer for further processing. */ -static unsigned int mkiss_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void mkiss_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct mkiss *ax = mkiss_get(tty); - int bytes = count; if (!ax) - return -ENODEV; + return; /* * Argh! mtu change time! - costs us the packet part received @@ -940,7 +939,7 @@ static unsigned int mkiss_receive_buf(struct tty_struct *tty, ax_changedmtu(ax); /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp != NULL && *fp++) { if (!test_and_set_bit(AXF_ERROR, &ax->flags)) ax->dev->stats.rx_errors++; @@ -953,8 +952,6 @@ static unsigned int mkiss_receive_buf(struct tty_struct *tty, mkiss_put(ax); tty_unthrottle(tty); - - return count; } /* diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index 035861d8acb..3352b2443e5 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -216,23 +216,23 @@ static int irtty_do_write(struct sir_dev *dev, const unsigned char *ptr, size_t * usbserial: urb-complete-interrupt / softint */ -static unsigned int irtty_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void irtty_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct sir_dev *dev; struct sirtty_cb *priv = tty->disc_data; int i; - IRDA_ASSERT(priv != NULL, return -ENODEV;); - IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return -EINVAL;); + IRDA_ASSERT(priv != NULL, return;); + IRDA_ASSERT(priv->magic == IRTTY_MAGIC, return;); if (unlikely(count==0)) /* yes, this happens */ - return 0; + return; dev = priv->dev; if (!dev) { IRDA_WARNING("%s(), not ready yet!\n", __func__); - return -ENODEV; + return; } for (i = 0; i < count; i++) { @@ -242,13 +242,11 @@ static unsigned int irtty_receive_buf(struct tty_struct *tty, if (fp && *fp++) { IRDA_DEBUG(0, "Framing or parity error!\n"); sirdev_receive(dev, NULL, 0); /* notify sir_dev (updating stats) */ - return -EINVAL; + return; } } sirdev_receive(dev, cp, count); - - return count; } /* diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index 53872d7d738..a1b82c9c67d 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -340,7 +340,7 @@ ppp_asynctty_poll(struct tty_struct *tty, struct file *file, poll_table *wait) } /* May sleep, don't call from interrupt level or with interrupts disabled */ -static unsigned int +static void ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf, char *cflags, int count) { @@ -348,7 +348,7 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf, unsigned long flags; if (!ap) - return -ENODEV; + return; spin_lock_irqsave(&ap->recv_lock, flags); ppp_async_input(ap, buf, cflags, count); spin_unlock_irqrestore(&ap->recv_lock, flags); @@ -356,8 +356,6 @@ ppp_asynctty_receive(struct tty_struct *tty, const unsigned char *buf, tasklet_schedule(&ap->tsk); ap_put(ap); tty_unthrottle(tty); - - return count; } static void diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c index 0815790a5cf..2573f525f11 100644 --- a/drivers/net/ppp_synctty.c +++ b/drivers/net/ppp_synctty.c @@ -381,7 +381,7 @@ ppp_sync_poll(struct tty_struct *tty, struct file *file, poll_table *wait) } /* May sleep, don't call from interrupt level or with interrupts disabled */ -static unsigned int +static void ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf, char *cflags, int count) { @@ -389,7 +389,7 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf, unsigned long flags; if (!ap) - return -ENODEV; + return; spin_lock_irqsave(&ap->recv_lock, flags); ppp_sync_input(ap, buf, cflags, count); spin_unlock_irqrestore(&ap->recv_lock, flags); @@ -397,8 +397,6 @@ ppp_sync_receive(struct tty_struct *tty, const unsigned char *buf, tasklet_schedule(&ap->tsk); sp_put(ap); tty_unthrottle(tty); - - return count; } static void diff --git a/drivers/net/slip.c b/drivers/net/slip.c index 584809c656d..8ec1a9a0bb9 100644 --- a/drivers/net/slip.c +++ b/drivers/net/slip.c @@ -670,17 +670,16 @@ static void sl_setup(struct net_device *dev) * in parallel */ -static unsigned int slip_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void slip_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct slip *sl = tty->disc_data; - int bytes = count; if (!sl || sl->magic != SLIP_MAGIC || !netif_running(sl->dev)) - return -ENODEV; + return; /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; @@ -694,8 +693,6 @@ static unsigned int slip_receive_buf(struct tty_struct *tty, #endif slip_unesc(sl, *cp++); } - - return count; } /************************************ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0cb0b063267..f6853247a62 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -609,7 +609,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) * before it gets out of hand. Naturally, this wastes entries. */ if (capacity < 2+MAX_SKB_FRAGS) { netif_stop_queue(dev); - if (unlikely(!virtqueue_enable_cb(vi->svq))) { + if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) { /* More just got used, free them then recheck. */ capacity += free_old_xmit_skbs(vi); if (capacity >= 2+MAX_SKB_FRAGS) { diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index 40398bf7d03..24297b274cd 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -517,18 +517,17 @@ static int x25_asy_close(struct net_device *dev) * and sent on to some IP layer for further processing. */ -static unsigned int x25_asy_receive_buf(struct tty_struct *tty, +static void x25_asy_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct x25_asy *sl = tty->disc_data; - int bytes = count; if (!sl || sl->magic != X25_ASY_MAGIC || !netif_running(sl->dev)) return; /* Read the characters out of the buffer */ - while (bytes--) { + while (count--) { if (fp && *fp++) { if (!test_and_set_bit(SLF_ERROR, &sl->flags)) sl->dev->stats.rx_errors++; @@ -537,8 +536,6 @@ static unsigned int x25_asy_receive_buf(struct tty_struct *tty, } x25_asy_unesc(sl, *cp++); } - - return count; } /* diff --git a/drivers/oprofile/event_buffer.h b/drivers/oprofile/event_buffer.h index 4e70749f8d1..a8d5bb3cba8 100644 --- a/drivers/oprofile/event_buffer.h +++ b/drivers/oprofile/event_buffer.h @@ -11,7 +11,7 @@ #define EVENT_BUFFER_H #include <linux/types.h> -#include <asm/mutex.h> +#include <linux/mutex.h> int alloc_event_buffer(void); diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index f9bda64fcd1..dccd8636095 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -14,7 +14,7 @@ #include <linux/moduleparam.h> #include <linux/workqueue.h> #include <linux/time.h> -#include <asm/mutex.h> +#include <linux/mutex.h> #include "oprof.h" #include "event_buffer.h" diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 12e02bf92c4..3dc9befa5ae 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -698,12 +698,7 @@ int __init detect_intel_iommu(void) { #ifdef CONFIG_INTR_REMAP struct acpi_table_dmar *dmar; - /* - * for now we will disable dma-remapping when interrupt - * remapping is enabled. - * When support for queued invalidation for IOTLB invalidation - * is added, we will not need this any more. - */ + dmar = (struct acpi_table_dmar *) dmar_tbl; if (ret && cpu_has_x2apic && dmar->flags & 0x1) printk(KERN_INFO diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 6af6b628175..59f17acf7f6 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -47,6 +47,8 @@ #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE +#define IS_BRIDGE_HOST_DEVICE(pdev) \ + ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) @@ -116,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level) return (pfn + level_size(level) - 1) & level_mask(level); } +static inline unsigned long lvl_to_nr_pages(unsigned int lvl) +{ + return 1 << ((lvl - 1) * LEVEL_STRIDE); +} + /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to work. */ static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) @@ -143,6 +150,12 @@ static void __init check_tylersburg_isoch(void); static int rwbf_quirk; /* + * set to 1 to panic kernel if can't successfully enable VT-d + * (used when kernel is launched w/ TXT) + */ +static int force_on = 0; + +/* * 0: Present * 1-11: Reserved * 12-63: Context Ptr (12 - (haw-1)) @@ -338,6 +351,9 @@ struct dmar_domain { int iommu_coherency;/* indicate coherency of iommu access */ int iommu_snooping; /* indicate snooping control feature*/ int iommu_count; /* reference count of iommu */ + int iommu_superpage;/* Level of superpages supported: + 0 == 4KiB (no superpages), 1 == 2MiB, + 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ spinlock_t iommu_lock; /* protect iommu set in domain */ u64 max_addr; /* maximum mapped address */ }; @@ -387,6 +403,7 @@ int dmar_disabled = 1; static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; +static int intel_iommu_superpage = 1; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -417,6 +434,10 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable batched IOTLB flush\n"); intel_iommu_strict = 1; + } else if (!strncmp(str, "sp_off", 6)) { + printk(KERN_INFO + "Intel-IOMMU: disable supported super page\n"); + intel_iommu_superpage = 0; } str += strcspn(str, ","); @@ -555,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain) } } +static void domain_update_iommu_superpage(struct dmar_domain *domain) +{ + int i, mask = 0xf; + + if (!intel_iommu_superpage) { + domain->iommu_superpage = 0; + return; + } + + domain->iommu_superpage = 4; /* 1TiB */ + + for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { + mask |= cap_super_page_val(g_iommus[i]->cap); + if (!mask) { + break; + } + } + domain->iommu_superpage = fls(mask); +} + /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); domain_update_iommu_snooping(domain); + domain_update_iommu_superpage(domain); } static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) @@ -689,23 +731,31 @@ out: } static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn) + unsigned long pfn, int large_level) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); - int offset; + int offset, target_level; BUG_ON(!domain->pgd); BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); parent = domain->pgd; + /* Search pte */ + if (!large_level) + target_level = 1; + else + target_level = large_level; + while (level > 0) { void *tmp_page; offset = pfn_level_offset(pfn, level); pte = &parent[offset]; - if (level == 1) + if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE)) + break; + if (level == target_level) break; if (!dma_pte_present(pte)) { @@ -733,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, return pte; } + /* return address's pte at specific level */ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, unsigned long pfn, - int level) + int level, int *large_page) { struct dma_pte *parent, *pte = NULL; int total = agaw_to_level(domain->agaw); @@ -749,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, if (level == total) return pte; - if (!dma_pte_present(pte)) + if (!dma_pte_present(pte)) { + *large_page = total; break; + } + + if (pte->val & DMA_PTE_LARGE_PAGE) { + *large_page = total; + return pte; + } + parent = phys_to_virt(dma_pte_addr(pte)); total--; } @@ -763,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + unsigned int large_page = 1; struct dma_pte *first_pte, *pte; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); @@ -771,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain, /* we don't need lock here; nobody else touches the iova range */ do { - first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); + large_page = 1; + first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); if (!pte) { - start_pfn = align_to_level(start_pfn + 1, 2); + start_pfn = align_to_level(start_pfn + 1, large_page + 1); continue; } - do { + do { dma_clear_pte(pte); - start_pfn++; + start_pfn += lvl_to_nr_pages(large_page); pte++; } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); @@ -798,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, int total = agaw_to_level(domain->agaw); int level; unsigned long tmp; + int large_page = 2; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); @@ -813,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, return; do { - first_pte = pte = dma_pfn_level_pte(domain, tmp, level); + large_page = level; + first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); + if (large_page > level) + level = large_page + 1; if (!pte) { tmp = align_to_level(tmp + 1, level + 1); continue; @@ -1397,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) else domain->iommu_snooping = 0; + domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); domain->iommu_count = 1; domain->nid = iommu->node; @@ -1417,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain) if (!domain) return; + /* Flush any lazy unmaps that may reference this domain */ + if (!intel_iommu_strict) + flush_unmaps_timeout(0); + domain_remove_dev_info(domain); /* destroy iovas */ put_iova_domain(&domain->iovad); @@ -1648,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr, return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; } +/* Return largest possible superpage level for a given mapping */ +static inline int hardware_largepage_caps(struct dmar_domain *domain, + unsigned long iov_pfn, + unsigned long phy_pfn, + unsigned long pages) +{ + int support, level = 1; + unsigned long pfnmerge; + + support = domain->iommu_superpage; + + /* To use a large page, the virtual *and* physical addresses + must be aligned to 2MiB/1GiB/etc. Lower bits set in either + of them will mean we have to use smaller pages. So just + merge them and check both at once. */ + pfnmerge = iov_pfn | phy_pfn; + + while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { + pages >>= VTD_STRIDE_SHIFT; + if (!pages) + break; + pfnmerge >>= VTD_STRIDE_SHIFT; + level++; + support--; + } + return level; +} + static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct scatterlist *sg, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -1656,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, phys_addr_t uninitialized_var(pteval); int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; unsigned long sg_res; + unsigned int largepage_lvl = 0; + unsigned long lvl_pages = 0; BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); @@ -1671,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; } - while (nr_pages--) { + while (nr_pages > 0) { uint64_t tmp; if (!sg_res) { @@ -1679,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; sg->dma_length = sg->length; pteval = page_to_phys(sg_page(sg)) | prot; + phys_pfn = pteval >> VTD_PAGE_SHIFT; } + if (!pte) { - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); + largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); + + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl); if (!pte) return -ENOMEM; + /* It is large page*/ + if (largepage_lvl > 1) + pteval |= DMA_PTE_LARGE_PAGE; + else + pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; + } /* We don't need lock here, nobody else * touches the iova range @@ -1699,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, } WARN_ON(1); } + + lvl_pages = lvl_to_nr_pages(largepage_lvl); + + BUG_ON(nr_pages < lvl_pages); + BUG_ON(sg_res < lvl_pages); + + nr_pages -= lvl_pages; + iov_pfn += lvl_pages; + phys_pfn += lvl_pages; + pteval += lvl_pages * VTD_PAGE_SIZE; + sg_res -= lvl_pages; + + /* If the next PTE would be the first in a new page, then we + need to flush the cache on the entries we've just written. + And then we'll need to recalculate 'pte', so clear it and + let it get set again in the if (!pte) block above. + + If we're done (!nr_pages) we need to flush the cache too. + + Also if we've been setting superpages, we may need to + recalculate 'pte' and switch back to smaller pages for the + end of the mapping, if the trailing size is not enough to + use another superpage (i.e. sg_res < lvl_pages). */ pte++; - if (!nr_pages || first_pte_in_page(pte)) { + if (!nr_pages || first_pte_in_page(pte) || + (largepage_lvl > 1 && sg_res < lvl_pages)) { domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); pte = NULL; } - iov_pfn++; - pteval += VTD_PAGE_SIZE; - sg_res--; - if (!sg_res) + + if (!sg_res && nr_pages) sg = sg_next(sg); } return 0; @@ -2016,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) return 0; return iommu_prepare_identity_map(pdev, rmrr->base_address, - rmrr->end_address + 1); + rmrr->end_address); } #ifdef CONFIG_DMAR_FLOPPY_WA @@ -2030,7 +2162,7 @@ static inline void iommu_prepare_isa(void) return; printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); - ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); + ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1); if (ret) printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " @@ -2106,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev) if (likely(!iommu_identity_mapping)) return 0; + info = pdev->dev.archdata.iommu; + if (info && info != DUMMY_DEVICE_DOMAIN_INFO) + return (info->domain == si_domain); - list_for_each_entry(info, &si_domain->devices, link) - if (info->dev == pdev) - return 1; return 0; } @@ -2187,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) * Assume that they will -- if they turn out not to be, then we can * take them out of the 1:1 domain later. */ - if (!startup) - return pdev->dma_mask > DMA_BIT_MASK(32); + if (!startup) { + /* + * If the device's dma_mask is less than the system's memory + * size then this is not a candidate for identity mapping. + */ + u64 dma_mask = pdev->dma_mask; + + if (pdev->dev.coherent_dma_mask && + pdev->dev.coherent_dma_mask < dma_mask) + dma_mask = pdev->dev.coherent_dma_mask; + + return dma_mask >= dma_get_required_mask(&pdev->dev); + } return 1; } @@ -2203,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw) return -EFAULT; for_each_pci_dev(pdev) { + /* Skip Host/PCI Bridge devices */ + if (IS_BRIDGE_HOST_DEVICE(pdev)) + continue; if (iommu_should_identity_map(pdev, 1)) { printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", hw ? "hardware" : "software", pci_name(pdev)); @@ -2218,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw) return 0; } -static int __init init_dmars(int force_on) +static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; struct dmar_rmrr_unit *rmrr; @@ -2592,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, iommu = domain_get_iommu(domain); size = aligned_nrpages(paddr, size); - iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), - pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask); if (!iova) goto error; @@ -3118,7 +3263,17 @@ static int init_iommu_hw(void) if (iommu->qi) dmar_reenable_qi(iommu); - for_each_active_iommu(iommu, drhd) { + for_each_iommu(iommu, drhd) { + if (drhd->ignored) { + /* + * we always have to disable PMRs or DMA may fail on + * this device + */ + if (force_on) + iommu_disable_protect_mem_regions(iommu); + continue; + } + iommu_flush_write_buffer(iommu); iommu_set_root_entry(iommu); @@ -3127,7 +3282,8 @@ static int init_iommu_hw(void) DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - iommu_enable_translation(iommu); + if (iommu_enable_translation(iommu)) + return 1; iommu_disable_protect_mem_regions(iommu); } @@ -3194,7 +3350,10 @@ static void iommu_resume(void) unsigned long flag; if (init_iommu_hw()) { - WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); + if (force_on) + panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); + else + WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); return; } @@ -3271,7 +3430,6 @@ static struct notifier_block device_nb = { int __init intel_iommu_init(void) { int ret = 0; - int force_on = 0; /* VT-d is required for a TXT/tboot launch, so enforce that */ force_on = tboot_force_iommu(); @@ -3309,7 +3467,7 @@ int __init intel_iommu_init(void) init_no_remapping_devices(); - ret = init_dmars(force_on); + ret = init_dmars(); if (ret) { if (force_on) panic("tboot: Failed to initialize DMARs\n"); @@ -3380,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_safe(entry, tmp, &domain->devices) { info = list_entry(entry, struct device_domain_info, link); - /* No need to compare PCI domain; it has to be the same */ - if (info->bus == pdev->bus->number && + if (info->segment == pci_domain_nr(pdev->bus) && + info->bus == pdev->bus->number && info->devfn == pdev->devfn) { list_del(&info->link); list_del(&info->global); @@ -3419,10 +3577,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, domain_update_iommu_cap(domain); spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); - spin_lock_irqsave(&iommu->lock, tmp_flags); - clear_bit(domain->id, iommu->domain_ids); - iommu->domains[domain->id] = NULL; - spin_unlock_irqrestore(&iommu->lock, tmp_flags); + if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && + !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) { + spin_lock_irqsave(&iommu->lock, tmp_flags); + clear_bit(domain->id, iommu->domain_ids); + iommu->domains[domain->id] = NULL; + spin_unlock_irqrestore(&iommu->lock, tmp_flags); + } } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -3505,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_count = 0; domain->iommu_coherency = 0; domain->iommu_snooping = 0; + domain->iommu_superpage = 0; domain->max_addr = 0; domain->nid = -1; @@ -3720,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, struct dma_pte *pte; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0); if (pte) phys = dma_pte_addr(pte); diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 9606e599a47..c5c274ab5c5 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) curr = iovad->cached32_node; cached_iova = container_of(curr, struct iova, node); - if (free->pfn_lo >= cached_iova->pfn_lo) - iovad->cached32_node = rb_next(&free->node); + if (free->pfn_lo >= cached_iova->pfn_lo) { + struct rb_node *node = rb_next(&free->node); + struct iova *iova = container_of(node, struct iova, node); + + /* only cache if it's below 32bit pfn */ + if (node && iova->pfn_lo < iovad->dma_32bit_pfn) + iovad->cached32_node = node; + else + iovad->cached32_node = NULL; + } } /* Computes the padding size required, to make the diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 7c3b18e78ce..d36f41ea8cb 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -195,6 +195,8 @@ static pci_power_t acpi_pci_choose_state(struct pci_dev *pdev) return PCI_D2; case ACPI_STATE_D3: return PCI_D3hot; + case ACPI_STATE_D3_COLD: + return PCI_D3cold; } return PCI_POWER_ERROR; } diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 5cb999b50f9..45e0191c35d 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -39,7 +39,7 @@ config ACER_WMI config ACERHDF tristate "Acer Aspire One temperature and fan driver" - depends on THERMAL && THERMAL_HWMON && ACPI + depends on THERMAL && ACPI ---help--- This is a driver for Acer Aspire One netbooks. It allows to access the temperature sensor and to control the fan. @@ -760,4 +760,13 @@ config MXM_WMI MXM is a standard for laptop graphics cards, the WMI interface is required for switchable nvidia graphics machines +config INTEL_OAKTRAIL + tristate "Intel Oaktrail Platform Extras" + depends on ACPI + depends on RFKILL && BACKLIGHT_CLASS_DEVICE && ACPI + ---help--- + Intel Oaktrail platform need this driver to provide interfaces to + enable/disable the Camera, WiFi, BT etc. devices. If in doubt, say Y + here; it will only load on supported platforms. + endif # X86_PLATFORM_DEVICES diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index a7ab3bc7b3a..afc1f832aa6 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -41,5 +41,6 @@ obj-$(CONFIG_XO1_RFKILL) += xo1-rfkill.o obj-$(CONFIG_XO15_EBOOK) += xo15-ebook.o obj-$(CONFIG_IBM_RTL) += ibm_rtl.o obj-$(CONFIG_SAMSUNG_LAPTOP) += samsung-laptop.o -obj-$(CONFIG_INTEL_MFLD_THERMAL) += intel_mid_thermal.o obj-$(CONFIG_MXM_WMI) += mxm-wmi.o +obj-$(CONFIG_INTEL_MID_POWER_BUTTON) += intel_mid_powerbtn.o +obj-$(CONFIG_INTEL_OAKTRAIL) += intel_oaktrail.o diff --git a/drivers/platform/x86/acer-wmi.c b/drivers/platform/x86/acer-wmi.c index ac4e7f83ce6..005417bd429 100644 --- a/drivers/platform/x86/acer-wmi.c +++ b/drivers/platform/x86/acer-wmi.c @@ -98,13 +98,26 @@ enum acer_wmi_event_ids { static const struct key_entry acer_wmi_keymap[] = { {KE_KEY, 0x01, {KEY_WLAN} }, /* WiFi */ + {KE_KEY, 0x03, {KEY_WLAN} }, /* WiFi */ {KE_KEY, 0x12, {KEY_BLUETOOTH} }, /* BT */ {KE_KEY, 0x21, {KEY_PROG1} }, /* Backup */ {KE_KEY, 0x22, {KEY_PROG2} }, /* Arcade */ {KE_KEY, 0x23, {KEY_PROG3} }, /* P_Key */ {KE_KEY, 0x24, {KEY_PROG4} }, /* Social networking_Key */ + {KE_IGNORE, 0x41, {KEY_MUTE} }, + {KE_IGNORE, 0x42, {KEY_PREVIOUSSONG} }, + {KE_IGNORE, 0x43, {KEY_NEXTSONG} }, + {KE_IGNORE, 0x44, {KEY_PLAYPAUSE} }, + {KE_IGNORE, 0x45, {KEY_STOP} }, + {KE_IGNORE, 0x48, {KEY_VOLUMEUP} }, + {KE_IGNORE, 0x49, {KEY_VOLUMEDOWN} }, + {KE_IGNORE, 0x61, {KEY_SWITCHVIDEOMODE} }, + {KE_IGNORE, 0x62, {KEY_BRIGHTNESSUP} }, + {KE_IGNORE, 0x63, {KEY_BRIGHTNESSDOWN} }, {KE_KEY, 0x64, {KEY_SWITCHVIDEOMODE} }, /* Display Switch */ + {KE_IGNORE, 0x81, {KEY_SLEEP} }, {KE_KEY, 0x82, {KEY_TOUCHPAD_TOGGLE} }, /* Touch Pad On/Off */ + {KE_IGNORE, 0x83, {KEY_TOUCHPAD_TOGGLE} }, {KE_END, 0} }; @@ -122,6 +135,7 @@ struct event_return_value { */ #define ACER_WMID3_GDS_WIRELESS (1<<0) /* WiFi */ #define ACER_WMID3_GDS_THREEG (1<<6) /* 3G */ +#define ACER_WMID3_GDS_WIMAX (1<<7) /* WiMAX */ #define ACER_WMID3_GDS_BLUETOOTH (1<<11) /* BT */ struct lm_input_params { @@ -737,8 +751,11 @@ WMI_execute_u32(u32 method_id, u32 in, u32 *out) obj = (union acpi_object *) result.pointer; if (obj && obj->type == ACPI_TYPE_BUFFER && - obj->buffer.length == sizeof(u32)) { + (obj->buffer.length == sizeof(u32) || + obj->buffer.length == sizeof(u64))) { tmp = *((u32 *) obj->buffer.pointer); + } else if (obj->type == ACPI_TYPE_INTEGER) { + tmp = (u32) obj->integer.value; } else { tmp = 0; } @@ -866,8 +883,11 @@ static acpi_status WMID_set_capabilities(void) obj = (union acpi_object *) out.pointer; if (obj && obj->type == ACPI_TYPE_BUFFER && - obj->buffer.length == sizeof(u32)) { + (obj->buffer.length == sizeof(u32) || + obj->buffer.length == sizeof(u64))) { devices = *((u32 *) obj->buffer.pointer); + } else if (obj->type == ACPI_TYPE_INTEGER) { + devices = (u32) obj->integer.value; } else { kfree(out.pointer); return AE_ERROR; @@ -876,7 +896,8 @@ static acpi_status WMID_set_capabilities(void) dmi_walk(type_aa_dmi_decode, NULL); if (!has_type_aa) { interface->capability |= ACER_CAP_WIRELESS; - interface->capability |= ACER_CAP_THREEG; + if (devices & 0x40) + interface->capability |= ACER_CAP_THREEG; if (devices & 0x10) interface->capability |= ACER_CAP_BLUETOOTH; } @@ -961,10 +982,12 @@ static void __init acer_commandline_init(void) * These will all fail silently if the value given is invalid, or the * capability isn't available on the given interface */ - set_u32(mailled, ACER_CAP_MAILLED); - if (!has_type_aa) + if (mailled >= 0) + set_u32(mailled, ACER_CAP_MAILLED); + if (!has_type_aa && threeg >= 0) set_u32(threeg, ACER_CAP_THREEG); - set_u32(brightness, ACER_CAP_BRIGHTNESS); + if (brightness >= 0) + set_u32(brightness, ACER_CAP_BRIGHTNESS); } /* @@ -1081,7 +1104,7 @@ static acpi_status wmid3_get_device_status(u32 *value, u16 device) return AE_ERROR; } if (obj->buffer.length != 8) { - pr_warning("Unknown buffer length %d\n", obj->buffer.length); + pr_warn("Unknown buffer length %d\n", obj->buffer.length); kfree(obj); return AE_ERROR; } @@ -1090,8 +1113,8 @@ static acpi_status wmid3_get_device_status(u32 *value, u16 device) kfree(obj); if (return_value.error_code || return_value.ec_return_value) - pr_warning("Get Device Status failed: " - "0x%x - 0x%x\n", return_value.error_code, + pr_warn("Get Device Status failed: 0x%x - 0x%x\n", + return_value.error_code, return_value.ec_return_value); else *value = !!(return_value.devices & device); @@ -1124,6 +1147,114 @@ static acpi_status get_device_status(u32 *value, u32 cap) } } +static acpi_status wmid3_set_device_status(u32 value, u16 device) +{ + struct wmid3_gds_return_value return_value; + acpi_status status; + union acpi_object *obj; + u16 devices; + struct wmid3_gds_input_param params = { + .function_num = 0x1, + .hotkey_number = 0x01, + .devices = ACER_WMID3_GDS_WIRELESS & + ACER_WMID3_GDS_THREEG & + ACER_WMID3_GDS_WIMAX & + ACER_WMID3_GDS_BLUETOOTH, + }; + struct acpi_buffer input = { + sizeof(struct wmid3_gds_input_param), + ¶ms + }; + struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_buffer output2 = { ACPI_ALLOCATE_BUFFER, NULL }; + + status = wmi_evaluate_method(WMID_GUID3, 0, 0x2, &input, &output); + if (ACPI_FAILURE(status)) + return status; + + obj = output.pointer; + + if (!obj) + return AE_ERROR; + else if (obj->type != ACPI_TYPE_BUFFER) { + kfree(obj); + return AE_ERROR; + } + if (obj->buffer.length != 8) { + pr_warning("Unknown buffer length %d\n", obj->buffer.length); + kfree(obj); + return AE_ERROR; + } + + return_value = *((struct wmid3_gds_return_value *)obj->buffer.pointer); + kfree(obj); + + if (return_value.error_code || return_value.ec_return_value) { + pr_warning("Get Current Device Status failed: " + "0x%x - 0x%x\n", return_value.error_code, + return_value.ec_return_value); + return status; + } + + devices = return_value.devices; + params.function_num = 0x2; + params.hotkey_number = 0x01; + params.devices = (value) ? (devices | device) : (devices & ~device); + + status = wmi_evaluate_method(WMID_GUID3, 0, 0x1, &input, &output2); + if (ACPI_FAILURE(status)) + return status; + + obj = output2.pointer; + + if (!obj) + return AE_ERROR; + else if (obj->type != ACPI_TYPE_BUFFER) { + kfree(obj); + return AE_ERROR; + } + if (obj->buffer.length != 4) { + pr_warning("Unknown buffer length %d\n", obj->buffer.length); + kfree(obj); + return AE_ERROR; + } + + return_value = *((struct wmid3_gds_return_value *)obj->buffer.pointer); + kfree(obj); + + if (return_value.error_code || return_value.ec_return_value) + pr_warning("Set Device Status failed: " + "0x%x - 0x%x\n", return_value.error_code, + return_value.ec_return_value); + + return status; +} + +static acpi_status set_device_status(u32 value, u32 cap) +{ + if (wmi_has_guid(WMID_GUID3)) { + u16 device; + + switch (cap) { + case ACER_CAP_WIRELESS: + device = ACER_WMID3_GDS_WIRELESS; + break; + case ACER_CAP_BLUETOOTH: + device = ACER_WMID3_GDS_BLUETOOTH; + break; + case ACER_CAP_THREEG: + device = ACER_WMID3_GDS_THREEG; + break; + default: + return AE_ERROR; + } + return wmid3_set_device_status(value, device); + + } else { + return set_u32(value, cap); + } +} + /* * Rfkill devices */ @@ -1160,7 +1291,7 @@ static int acer_rfkill_set(void *data, bool blocked) u32 cap = (unsigned long)data; if (rfkill_inited) { - status = set_u32(!blocked, cap); + status = set_device_status(!blocked, cap); if (ACPI_FAILURE(status)) return -ENODEV; } @@ -1317,7 +1448,7 @@ static void acer_wmi_notify(u32 value, void *context) status = wmi_get_event_data(value, &response); if (status != AE_OK) { - pr_warning("bad event status 0x%x\n", status); + pr_warn("bad event status 0x%x\n", status); return; } @@ -1326,12 +1457,12 @@ static void acer_wmi_notify(u32 value, void *context) if (!obj) return; if (obj->type != ACPI_TYPE_BUFFER) { - pr_warning("Unknown response received %d\n", obj->type); + pr_warn("Unknown response received %d\n", obj->type); kfree(obj); return; } if (obj->buffer.length != 8) { - pr_warning("Unknown buffer length %d\n", obj->buffer.length); + pr_warn("Unknown buffer length %d\n", obj->buffer.length); kfree(obj); return; } @@ -1343,7 +1474,7 @@ static void acer_wmi_notify(u32 value, void *context) case WMID_HOTKEY_EVENT: if (return_value.device_state) { u16 device_state = return_value.device_state; - pr_debug("deivces states: 0x%x\n", device_state); + pr_debug("device state: 0x%x\n", device_state); if (has_cap(ACER_CAP_WIRELESS)) rfkill_set_sw_state(wireless_rfkill, !(device_state & ACER_WMID3_GDS_WIRELESS)); @@ -1356,11 +1487,11 @@ static void acer_wmi_notify(u32 value, void *context) } if (!sparse_keymap_report_event(acer_wmi_input_dev, return_value.key_num, 1, true)) - pr_warning("Unknown key number - 0x%x\n", + pr_warn("Unknown key number - 0x%x\n", return_value.key_num); break; default: - pr_warning("Unknown function number - %d - %d\n", + pr_warn("Unknown function number - %d - %d\n", return_value.function, return_value.key_num); break; } @@ -1389,7 +1520,7 @@ wmid3_set_lm_mode(struct lm_input_params *params, return AE_ERROR; } if (obj->buffer.length != 4) { - pr_warning("Unknown buffer length %d\n", obj->buffer.length); + pr_warn("Unknown buffer length %d\n", obj->buffer.length); kfree(obj); return AE_ERROR; } @@ -1414,11 +1545,11 @@ static int acer_wmi_enable_ec_raw(void) status = wmid3_set_lm_mode(¶ms, &return_value); if (return_value.error_code || return_value.ec_return_value) - pr_warning("Enabling EC raw mode failed: " - "0x%x - 0x%x\n", return_value.error_code, - return_value.ec_return_value); + pr_warn("Enabling EC raw mode failed: 0x%x - 0x%x\n", + return_value.error_code, + return_value.ec_return_value); else - pr_info("Enabled EC raw mode"); + pr_info("Enabled EC raw mode\n"); return status; } @@ -1437,9 +1568,9 @@ static int acer_wmi_enable_lm(void) status = wmid3_set_lm_mode(¶ms, &return_value); if (return_value.error_code || return_value.ec_return_value) - pr_warning("Enabling Launch Manager failed: " - "0x%x - 0x%x\n", return_value.error_code, - return_value.ec_return_value); + pr_warn("Enabling Launch Manager failed: 0x%x - 0x%x\n", + return_value.error_code, + return_value.ec_return_value); return status; } @@ -1506,8 +1637,11 @@ static u32 get_wmid_devices(void) obj = (union acpi_object *) out.pointer; if (obj && obj->type == ACPI_TYPE_BUFFER && - obj->buffer.length == sizeof(u32)) { + (obj->buffer.length == sizeof(u32) || + obj->buffer.length == sizeof(u64))) { devices = *((u32 *) obj->buffer.pointer); + } else if (obj->type == ACPI_TYPE_INTEGER) { + devices = (u32) obj->integer.value; } kfree(out.pointer); diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 60f9cfcac93..fca3489218b 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -35,10 +35,8 @@ #include <linux/kernel.h> #include <linux/module.h> -#include <linux/fs.h> #include <linux/dmi.h> -#include <acpi/acpi_drivers.h> -#include <linux/sched.h> +#include <linux/acpi.h> #include <linux/thermal.h> #include <linux/platform_device.h> diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c index c53b3ff7978..d65df92e2ac 100644 --- a/drivers/platform/x86/asus-laptop.c +++ b/drivers/platform/x86/asus-laptop.c @@ -318,7 +318,7 @@ static int acpi_check_handle(acpi_handle handle, const char *method, if (status != AE_OK) { if (ret) - pr_warning("Error finding %s\n", method); + pr_warn("Error finding %s\n", method); return -ENODEV; } return 0; @@ -383,7 +383,7 @@ static int asus_kled_lvl(struct asus_laptop *asus) rv = acpi_evaluate_integer(asus->handle, METHOD_KBD_LIGHT_GET, ¶ms, &kblv); if (ACPI_FAILURE(rv)) { - pr_warning("Error reading kled level\n"); + pr_warn("Error reading kled level\n"); return -ENODEV; } return kblv; @@ -397,7 +397,7 @@ static int asus_kled_set(struct asus_laptop *asus, int kblv) kblv = 0; if (write_acpi_int(asus->handle, METHOD_KBD_LIGHT_SET, kblv)) { - pr_warning("Keyboard LED display write failed\n"); + pr_warn("Keyboard LED display write failed\n"); return -EINVAL; } return 0; @@ -531,7 +531,7 @@ static int asus_read_brightness(struct backlight_device *bd) rv = acpi_evaluate_integer(asus->handle, METHOD_BRIGHTNESS_GET, NULL, &value); if (ACPI_FAILURE(rv)) - pr_warning("Error reading brightness\n"); + pr_warn("Error reading brightness\n"); return value; } @@ -541,7 +541,7 @@ static int asus_set_brightness(struct backlight_device *bd, int value) struct asus_laptop *asus = bl_get_data(bd); if (write_acpi_int(asus->handle, METHOD_BRIGHTNESS_SET, value)) { - pr_warning("Error changing brightness\n"); + pr_warn("Error changing brightness\n"); return -EIO; } return 0; @@ -730,7 +730,7 @@ static ssize_t store_ledd(struct device *dev, struct device_attribute *attr, rv = parse_arg(buf, count, &value); if (rv > 0) { if (write_acpi_int(asus->handle, METHOD_LEDD, value)) { - pr_warning("LED display write failed\n"); + pr_warn("LED display write failed\n"); return -ENODEV; } asus->ledd_status = (u32) value; @@ -752,7 +752,7 @@ static int asus_wireless_status(struct asus_laptop *asus, int mask) rv = acpi_evaluate_integer(asus->handle, METHOD_WL_STATUS, NULL, &status); if (ACPI_FAILURE(rv)) { - pr_warning("Error reading Wireless status\n"); + pr_warn("Error reading Wireless status\n"); return -EINVAL; } return !!(status & mask); @@ -764,7 +764,7 @@ static int asus_wireless_status(struct asus_laptop *asus, int mask) static int asus_wlan_set(struct asus_laptop *asus, int status) { if (write_acpi_int(asus->handle, METHOD_WLAN, !!status)) { - pr_warning("Error setting wlan status to %d", status); + pr_warn("Error setting wlan status to %d\n", status); return -EIO; } return 0; @@ -792,7 +792,7 @@ static ssize_t store_wlan(struct device *dev, struct device_attribute *attr, static int asus_bluetooth_set(struct asus_laptop *asus, int status) { if (write_acpi_int(asus->handle, METHOD_BLUETOOTH, !!status)) { - pr_warning("Error setting bluetooth status to %d", status); + pr_warn("Error setting bluetooth status to %d\n", status); return -EIO; } return 0; @@ -821,7 +821,7 @@ static ssize_t store_bluetooth(struct device *dev, static int asus_wimax_set(struct asus_laptop *asus, int status) { if (write_acpi_int(asus->handle, METHOD_WIMAX, !!status)) { - pr_warning("Error setting wimax status to %d", status); + pr_warn("Error setting wimax status to %d\n", status); return -EIO; } return 0; @@ -850,7 +850,7 @@ static ssize_t store_wimax(struct device *dev, static int asus_wwan_set(struct asus_laptop *asus, int status) { if (write_acpi_int(asus->handle, METHOD_WWAN, !!status)) { - pr_warning("Error setting wwan status to %d", status); + pr_warn("Error setting wwan status to %d\n", status); return -EIO; } return 0; @@ -880,7 +880,7 @@ static void asus_set_display(struct asus_laptop *asus, int value) { /* no sanity check needed for now */ if (write_acpi_int(asus->handle, METHOD_SWITCH_DISPLAY, value)) - pr_warning("Error setting display\n"); + pr_warn("Error setting display\n"); return; } @@ -909,7 +909,7 @@ static ssize_t store_disp(struct device *dev, struct device_attribute *attr, static void asus_als_switch(struct asus_laptop *asus, int value) { if (write_acpi_int(asus->handle, METHOD_ALS_CONTROL, value)) - pr_warning("Error setting light sensor switch\n"); + pr_warn("Error setting light sensor switch\n"); asus->light_switch = value; } @@ -937,7 +937,7 @@ static ssize_t store_lssw(struct device *dev, struct device_attribute *attr, static void asus_als_level(struct asus_laptop *asus, int value) { if (write_acpi_int(asus->handle, METHOD_ALS_LEVEL, value)) - pr_warning("Error setting light sensor level\n"); + pr_warn("Error setting light sensor level\n"); asus->light_level = value; } @@ -976,7 +976,7 @@ static int asus_gps_status(struct asus_laptop *asus) rv = acpi_evaluate_integer(asus->handle, METHOD_GPS_STATUS, NULL, &status); if (ACPI_FAILURE(rv)) { - pr_warning("Error reading GPS status\n"); + pr_warn("Error reading GPS status\n"); return -ENODEV; } return !!status; @@ -1284,7 +1284,7 @@ static int asus_laptop_get_info(struct asus_laptop *asus) */ status = acpi_get_table(ACPI_SIG_DSDT, 1, &asus->dsdt_info); if (ACPI_FAILURE(status)) - pr_warning("Couldn't get the DSDT table header\n"); + pr_warn("Couldn't get the DSDT table header\n"); /* We have to write 0 on init this far for all ASUS models */ if (write_acpi_int_ret(asus->handle, "INIT", 0, &buffer)) { @@ -1296,7 +1296,7 @@ static int asus_laptop_get_info(struct asus_laptop *asus) status = acpi_evaluate_integer(asus->handle, "BSTS", NULL, &bsts_result); if (ACPI_FAILURE(status)) - pr_warning("Error calling BSTS\n"); + pr_warn("Error calling BSTS\n"); else if (bsts_result) pr_notice("BSTS called, 0x%02x returned\n", (uint) bsts_result); diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index 832a3fd7c1c..00460cb9587 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -425,7 +425,7 @@ static void asus_rfkill_hotplug(struct asus_wmi *asus) if (asus->hotplug_slot) { bus = pci_find_bus(0, 1); if (!bus) { - pr_warning("Unable to find PCI bus 1?\n"); + pr_warn("Unable to find PCI bus 1?\n"); goto out_unlock; } @@ -436,12 +436,12 @@ static void asus_rfkill_hotplug(struct asus_wmi *asus) absent = (l == 0xffffffff); if (blocked != absent) { - pr_warning("BIOS says wireless lan is %s, " - "but the pci device is %s\n", - blocked ? "blocked" : "unblocked", - absent ? "absent" : "present"); - pr_warning("skipped wireless hotplug as probably " - "inappropriate for this model\n"); + pr_warn("BIOS says wireless lan is %s, " + "but the pci device is %s\n", + blocked ? "blocked" : "unblocked", + absent ? "absent" : "present"); + pr_warn("skipped wireless hotplug as probably " + "inappropriate for this model\n"); goto out_unlock; } @@ -500,7 +500,7 @@ static int asus_register_rfkill_notifier(struct asus_wmi *asus, char *node) ACPI_SYSTEM_NOTIFY, asus_rfkill_notify, asus); if (ACPI_FAILURE(status)) - pr_warning("Failed to register notify on %s\n", node); + pr_warn("Failed to register notify on %s\n", node); } else return -ENODEV; @@ -1223,7 +1223,7 @@ static int asus_wmi_sysfs_init(struct platform_device *device) /* * Platform device */ -static int __init asus_wmi_platform_init(struct asus_wmi *asus) +static int asus_wmi_platform_init(struct asus_wmi *asus) { int rv; @@ -1583,12 +1583,12 @@ static int asus_wmi_probe(struct platform_device *pdev) int ret; if (!wmi_has_guid(ASUS_WMI_MGMT_GUID)) { - pr_warning("Management GUID not found\n"); + pr_warn("Management GUID not found\n"); return -ENODEV; } if (wdrv->event_guid && !wmi_has_guid(wdrv->event_guid)) { - pr_warning("Event GUID not found\n"); + pr_warn("Event GUID not found\n"); return -ENODEV; } diff --git a/drivers/platform/x86/asus_acpi.c b/drivers/platform/x86/asus_acpi.c index f503607c064..d9312b3073e 100644 --- a/drivers/platform/x86/asus_acpi.c +++ b/drivers/platform/x86/asus_acpi.c @@ -30,6 +30,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> @@ -581,8 +583,7 @@ static int read_led(const char *ledname, int ledmask) if (read_acpi_int(NULL, ledname, &led_status)) return led_status; else - printk(KERN_WARNING "Asus ACPI: Error reading LED " - "status\n"); + pr_warn("Error reading LED status\n"); } return (hotk->status & ledmask) ? 1 : 0; } @@ -621,8 +622,7 @@ write_led(const char __user *buffer, unsigned long count, led_out = !led_out; if (!write_acpi_int(hotk->handle, ledname, led_out, NULL)) - printk(KERN_WARNING "Asus ACPI: LED (%s) write failed\n", - ledname); + pr_warn("LED (%s) write failed\n", ledname); return rv; } @@ -679,8 +679,7 @@ static ssize_t ledd_proc_write(struct file *file, const char __user *buffer, if (rv > 0) { if (!write_acpi_int (hotk->handle, hotk->methods->mt_ledd, value, NULL)) - printk(KERN_WARNING - "Asus ACPI: LED display write failed\n"); + pr_warn("LED display write failed\n"); else hotk->ledd_status = (u32) value; } @@ -838,8 +837,7 @@ static int get_lcd_state(void) } else { /* We don't have to check anything if we are here */ if (!read_acpi_int(NULL, hotk->methods->lcd_status, &lcd)) - printk(KERN_WARNING - "Asus ACPI: Error reading LCD status\n"); + pr_warn("Error reading LCD status\n"); if (hotk->model == L2D) lcd = ~lcd; @@ -871,7 +869,7 @@ static int set_lcd_state(int value) the exact behaviour is simulated here */ } if (ACPI_FAILURE(status)) - printk(KERN_WARNING "Asus ACPI: Error switching LCD\n"); + pr_warn("Error switching LCD\n"); } return 0; @@ -915,13 +913,11 @@ static int read_brightness(struct backlight_device *bd) if (hotk->methods->brightness_get) { /* SPLV/GPLV laptop */ if (!read_acpi_int(hotk->handle, hotk->methods->brightness_get, &value)) - printk(KERN_WARNING - "Asus ACPI: Error reading brightness\n"); + pr_warn("Error reading brightness\n"); } else if (hotk->methods->brightness_status) { /* For D1 for example */ if (!read_acpi_int(NULL, hotk->methods->brightness_status, &value)) - printk(KERN_WARNING - "Asus ACPI: Error reading brightness\n"); + pr_warn("Error reading brightness\n"); } else /* No GPLV method */ value = hotk->brightness; return value; @@ -939,8 +935,7 @@ static int set_brightness(int value) if (hotk->methods->brightness_set) { if (!write_acpi_int(hotk->handle, hotk->methods->brightness_set, value, NULL)) { - printk(KERN_WARNING - "Asus ACPI: Error changing brightness\n"); + pr_warn("Error changing brightness\n"); ret = -EIO; } goto out; @@ -955,8 +950,7 @@ static int set_brightness(int value) NULL, NULL); (value > 0) ? value-- : value++; if (ACPI_FAILURE(status)) { - printk(KERN_WARNING - "Asus ACPI: Error changing brightness\n"); + pr_warn("Error changing brightness\n"); ret = -EIO; } } @@ -1008,7 +1002,7 @@ static void set_display(int value) /* no sanity check needed for now */ if (!write_acpi_int(hotk->handle, hotk->methods->display_set, value, NULL)) - printk(KERN_WARNING "Asus ACPI: Error setting display\n"); + pr_warn("Error setting display\n"); return; } @@ -1021,8 +1015,7 @@ static int disp_proc_show(struct seq_file *m, void *v) int value = 0; if (!read_acpi_int(hotk->handle, hotk->methods->display_get, &value)) - printk(KERN_WARNING - "Asus ACPI: Error reading display status\n"); + pr_warn("Error reading display status\n"); value &= 0x07; /* needed for some models, shouldn't hurt others */ seq_printf(m, "%d\n", value); return 0; @@ -1068,7 +1061,7 @@ asus_proc_add(char *name, const struct file_operations *proc_fops, mode_t mode, proc = proc_create_data(name, mode, acpi_device_dir(device), proc_fops, acpi_driver_data(device)); if (!proc) { - printk(KERN_WARNING " Unable to create %s fs entry\n", name); + pr_warn(" Unable to create %s fs entry\n", name); return -1; } proc->uid = asus_uid; @@ -1085,8 +1078,8 @@ static int asus_hotk_add_fs(struct acpi_device *device) mode = S_IFREG | S_IRUGO | S_IWUSR | S_IWGRP; } else { mode = S_IFREG | S_IRUSR | S_IRGRP | S_IWUSR | S_IWGRP; - printk(KERN_WARNING " asus_uid and asus_gid parameters are " - "deprecated, use chown and chmod instead!\n"); + pr_warn(" asus_uid and asus_gid parameters are " + "deprecated, use chown and chmod instead!\n"); } acpi_device_dir(device) = asus_proc_dir; @@ -1099,8 +1092,7 @@ static int asus_hotk_add_fs(struct acpi_device *device) proc->uid = asus_uid; proc->gid = asus_gid; } else { - printk(KERN_WARNING " Unable to create " PROC_INFO - " fs entry\n"); + pr_warn(" Unable to create " PROC_INFO " fs entry\n"); } if (hotk->methods->mt_wled) { @@ -1283,20 +1275,19 @@ static int asus_hotk_get_info(void) */ status = acpi_get_table(ACPI_SIG_DSDT, 1, &asus_info); if (ACPI_FAILURE(status)) - printk(KERN_WARNING " Couldn't get the DSDT table header\n"); + pr_warn(" Couldn't get the DSDT table header\n"); /* We have to write 0 on init this far for all ASUS models */ if (!write_acpi_int(hotk->handle, "INIT", 0, &buffer)) { - printk(KERN_ERR " Hotkey initialization failed\n"); + pr_err(" Hotkey initialization failed\n"); return -ENODEV; } /* This needs to be called for some laptops to init properly */ if (!read_acpi_int(hotk->handle, "BSTS", &bsts_result)) - printk(KERN_WARNING " Error calling BSTS\n"); + pr_warn(" Error calling BSTS\n"); else if (bsts_result) - printk(KERN_NOTICE " BSTS called, 0x%02x returned\n", - bsts_result); + pr_notice(" BSTS called, 0x%02x returned\n", bsts_result); /* * Try to match the object returned by INIT to the specific model. @@ -1324,23 +1315,21 @@ static int asus_hotk_get_info(void) if (asus_info && strncmp(asus_info->oem_table_id, "ODEM", 4) == 0) { hotk->model = P30; - printk(KERN_NOTICE - " Samsung P30 detected, supported\n"); + pr_notice(" Samsung P30 detected, supported\n"); hotk->methods = &model_conf[hotk->model]; kfree(model); return 0; } else { hotk->model = M2E; - printk(KERN_NOTICE " unsupported model %s, trying " - "default values\n", string); - printk(KERN_NOTICE - " send /proc/acpi/dsdt to the developers\n"); + pr_notice(" unsupported model %s, trying default values\n", + string); + pr_notice(" send /proc/acpi/dsdt to the developers\n"); kfree(model); return -ENODEV; } } hotk->methods = &model_conf[hotk->model]; - printk(KERN_NOTICE " %s model detected, supported\n", string); + pr_notice(" %s model detected, supported\n", string); /* Sort of per-model blacklist */ if (strncmp(string, "L2B", 3) == 0) @@ -1385,7 +1374,7 @@ static int asus_hotk_check(void) if (hotk->device->status.present) { result = asus_hotk_get_info(); } else { - printk(KERN_ERR " Hotkey device not present, aborting\n"); + pr_err(" Hotkey device not present, aborting\n"); return -EINVAL; } @@ -1399,8 +1388,7 @@ static int asus_hotk_add(struct acpi_device *device) acpi_status status = AE_OK; int result; - printk(KERN_NOTICE "Asus Laptop ACPI Extras version %s\n", - ASUS_ACPI_VERSION); + pr_notice("Asus Laptop ACPI Extras version %s\n", ASUS_ACPI_VERSION); hotk = kzalloc(sizeof(struct asus_hotk), GFP_KERNEL); if (!hotk) @@ -1428,15 +1416,14 @@ static int asus_hotk_add(struct acpi_device *device) acpi_evaluate_object(NULL, hotk->methods->brightness_down, NULL, NULL); if (ACPI_FAILURE(status)) - printk(KERN_WARNING " Error changing brightness\n"); + pr_warn(" Error changing brightness\n"); else { status = acpi_evaluate_object(NULL, hotk->methods->brightness_up, NULL, NULL); if (ACPI_FAILURE(status)) - printk(KERN_WARNING " Strange, error changing" - " brightness\n"); + pr_warn(" Strange, error changing brightness\n"); } } @@ -1488,7 +1475,7 @@ static int __init asus_acpi_init(void) asus_proc_dir = proc_mkdir(PROC_ASUS, acpi_root_dir); if (!asus_proc_dir) { - printk(KERN_ERR "Asus ACPI: Unable to create /proc entry\n"); + pr_err("Unable to create /proc entry\n"); acpi_bus_unregister_driver(&asus_hotk_driver); return -ENODEV; } @@ -1513,7 +1500,7 @@ static int __init asus_acpi_init(void) &asus_backlight_data, &props); if (IS_ERR(asus_backlight_device)) { - printk(KERN_ERR "Could not register asus backlight device\n"); + pr_err("Could not register asus backlight device\n"); asus_backlight_device = NULL; asus_acpi_exit(); return -ENODEV; diff --git a/drivers/platform/x86/compal-laptop.c b/drivers/platform/x86/compal-laptop.c index c16a27641ce..3f204fde1b0 100644 --- a/drivers/platform/x86/compal-laptop.c +++ b/drivers/platform/x86/compal-laptop.c @@ -68,6 +68,8 @@ * only enabled on a JHL90 board until it is verified that they work on the * other boards too. See the extra_features variable. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -200,8 +202,8 @@ static bool extra_features; * watching the output of address 0x4F (do an ec_transaction writing 0x33 * into 0x4F and read a few bytes from the output, like so: * u8 writeData = 0x33; - * ec_transaction(0x4F, &writeData, 1, buffer, 32, 0); - * That address is labelled "fan1 table information" in the service manual. + * ec_transaction(0x4F, &writeData, 1, buffer, 32); + * That address is labeled "fan1 table information" in the service manual. * It should be clear which value in 'buffer' changes). This seems to be * related to fan speed. It isn't a proper 'realtime' fan speed value * though, because physically stopping or speeding up the fan doesn't @@ -286,7 +288,7 @@ static int get_backlight_level(void) static void set_backlight_state(bool on) { u8 data = on ? BACKLIGHT_STATE_ON_DATA : BACKLIGHT_STATE_OFF_DATA; - ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0, 0); + ec_transaction(BACKLIGHT_STATE_ADDR, &data, 1, NULL, 0); } @@ -294,24 +296,24 @@ static void set_backlight_state(bool on) static void pwm_enable_control(void) { unsigned char writeData = PWM_ENABLE_DATA; - ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0, 0); + ec_transaction(PWM_ENABLE_ADDR, &writeData, 1, NULL, 0); } static void pwm_disable_control(void) { unsigned char writeData = PWM_DISABLE_DATA; - ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0, 0); + ec_transaction(PWM_DISABLE_ADDR, &writeData, 1, NULL, 0); } static void set_pwm(int pwm) { - ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0, 0); + ec_transaction(PWM_ADDRESS, &pwm_lookup_table[pwm], 1, NULL, 0); } static int get_fan_rpm(void) { u8 value, data = FAN_DATA; - ec_transaction(FAN_ADDRESS, &data, 1, &value, 1, 0); + ec_transaction(FAN_ADDRESS, &data, 1, &value, 1); return 100 * (int)value; } @@ -760,16 +762,14 @@ static struct rfkill *bt_rfkill; static int dmi_check_cb(const struct dmi_system_id *id) { - printk(KERN_INFO DRIVER_NAME": Identified laptop model '%s'\n", - id->ident); + pr_info("Identified laptop model '%s'\n", id->ident); extra_features = false; return 1; } static int dmi_check_cb_extra(const struct dmi_system_id *id) { - printk(KERN_INFO DRIVER_NAME": Identified laptop model '%s', " - "enabling extra features\n", + pr_info("Identified laptop model '%s', enabling extra features\n", id->ident); extra_features = true; return 1; @@ -956,14 +956,12 @@ static int __init compal_init(void) int ret; if (acpi_disabled) { - printk(KERN_ERR DRIVER_NAME": ACPI needs to be enabled for " - "this driver to work!\n"); + pr_err("ACPI needs to be enabled for this driver to work!\n"); return -ENODEV; } if (!force && !dmi_check_system(compal_dmi_table)) { - printk(KERN_ERR DRIVER_NAME": Motherboard not recognized (You " - "could try the module's force-parameter)"); + pr_err("Motherboard not recognized (You could try the module's force-parameter)\n"); return -ENODEV; } @@ -998,8 +996,7 @@ static int __init compal_init(void) if (ret) goto err_rfkill; - printk(KERN_INFO DRIVER_NAME": Driver "DRIVER_VERSION - " successfully loaded\n"); + pr_info("Driver " DRIVER_VERSION " successfully loaded\n"); return 0; err_rfkill: @@ -1064,7 +1061,7 @@ static void __exit compal_cleanup(void) rfkill_destroy(wifi_rfkill); rfkill_destroy(bt_rfkill); - printk(KERN_INFO DRIVER_NAME": Driver unloaded\n"); + pr_info("Driver unloaded\n"); } static int __devexit compal_remove(struct platform_device *pdev) @@ -1074,8 +1071,7 @@ static int __devexit compal_remove(struct platform_device *pdev) if (!extra_features) return 0; - printk(KERN_INFO DRIVER_NAME": Unloading: resetting fan control " - "to motherboard\n"); + pr_info("Unloading: resetting fan control to motherboard\n"); pwm_disable_control(); data = platform_get_drvdata(pdev); diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c index de301aa8e5c..d3841de6a8c 100644 --- a/drivers/platform/x86/dell-laptop.c +++ b/drivers/platform/x86/dell-laptop.c @@ -11,6 +11,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -434,8 +436,7 @@ static int __init dell_setup_rfkill(void) int ret; if (dmi_check_system(dell_blacklist)) { - printk(KERN_INFO "dell-laptop: Blacklisted hardware detected - " - "not enabling rfkill\n"); + pr_info("Blacklisted hardware detected - not enabling rfkill\n"); return 0; } @@ -606,7 +607,7 @@ static int __init dell_init(void) dmi_walk(find_tokens, NULL); if (!da_tokens) { - printk(KERN_INFO "dell-laptop: Unable to find dmi tokens\n"); + pr_info("Unable to find dmi tokens\n"); return -ENODEV; } @@ -636,14 +637,13 @@ static int __init dell_init(void) ret = dell_setup_rfkill(); if (ret) { - printk(KERN_WARNING "dell-laptop: Unable to setup rfkill\n"); + pr_warn("Unable to setup rfkill\n"); goto fail_rfkill; } ret = i8042_install_filter(dell_laptop_i8042_filter); if (ret) { - printk(KERN_WARNING - "dell-laptop: Unable to install key filter\n"); + pr_warn("Unable to install key filter\n"); goto fail_filter; } diff --git a/drivers/platform/x86/dell-wmi-aio.c b/drivers/platform/x86/dell-wmi-aio.c index 0ed84573ae1..3f945457f71 100644 --- a/drivers/platform/x86/dell-wmi-aio.c +++ b/drivers/platform/x86/dell-wmi-aio.c @@ -15,6 +15,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> @@ -138,7 +139,7 @@ static int __init dell_wmi_aio_init(void) guid = dell_wmi_aio_find(); if (!guid) { - pr_warning("No known WMI GUID found\n"); + pr_warn("No known WMI GUID found\n"); return -ENXIO; } diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c index 77f1d55414c..ce790827e19 100644 --- a/drivers/platform/x86/dell-wmi.c +++ b/drivers/platform/x86/dell-wmi.c @@ -23,6 +23,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -141,7 +143,7 @@ static void dell_wmi_notify(u32 value, void *context) status = wmi_get_event_data(value, &response); if (status != AE_OK) { - printk(KERN_INFO "dell-wmi: bad event status 0x%x\n", status); + pr_info("bad event status 0x%x\n", status); return; } @@ -153,8 +155,8 @@ static void dell_wmi_notify(u32 value, void *context) u16 *buffer_entry = (u16 *)obj->buffer.pointer; if (dell_new_hk_type && (buffer_entry[1] != 0x10)) { - printk(KERN_INFO "dell-wmi: Received unknown WMI event" - " (0x%x)\n", buffer_entry[1]); + pr_info("Received unknown WMI event (0x%x)\n", + buffer_entry[1]); kfree(obj); return; } @@ -167,8 +169,7 @@ static void dell_wmi_notify(u32 value, void *context) key = sparse_keymap_entry_from_scancode(dell_wmi_input_dev, reported_key); if (!key) { - printk(KERN_INFO "dell-wmi: Unknown key %x pressed\n", - reported_key); + pr_info("Unknown key %x pressed\n", reported_key); } else if ((key->keycode == KEY_BRIGHTNESSUP || key->keycode == KEY_BRIGHTNESSDOWN) && acpi_video) { /* Don't report brightness notifications that will also @@ -275,7 +276,7 @@ static int __init dell_wmi_init(void) acpi_status status; if (!wmi_has_guid(DELL_EVENT_GUID)) { - printk(KERN_WARNING "dell-wmi: No known WMI GUID found\n"); + pr_warn("No known WMI GUID found\n"); return -ENODEV; } @@ -290,9 +291,7 @@ static int __init dell_wmi_init(void) dell_wmi_notify, NULL); if (ACPI_FAILURE(status)) { dell_wmi_input_destroy(); - printk(KERN_ERR - "dell-wmi: Unable to register notify handler - %d\n", - status); + pr_err("Unable to register notify handler - %d\n", status); return -ENODEV; } diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 2c1abf63957..1c45d92e216 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -228,7 +228,7 @@ static int set_acpi(struct eeepc_laptop *eeepc, int cm, int value) return -ENODEV; if (write_acpi_int(eeepc->handle, method, value)) - pr_warning("Error writing %s\n", method); + pr_warn("Error writing %s\n", method); return 0; } @@ -243,7 +243,7 @@ static int get_acpi(struct eeepc_laptop *eeepc, int cm) return -ENODEV; if (read_acpi_int(eeepc->handle, method, &value)) - pr_warning("Error reading %s\n", method); + pr_warn("Error reading %s\n", method); return value; } @@ -261,7 +261,7 @@ static int acpi_setter_handle(struct eeepc_laptop *eeepc, int cm, status = acpi_get_handle(eeepc->handle, (char *)method, handle); if (status != AE_OK) { - pr_warning("Error finding %s\n", method); + pr_warn("Error finding %s\n", method); return -ENODEV; } return 0; @@ -417,7 +417,7 @@ static ssize_t store_cpufv_disabled(struct device *dev, switch (value) { case 0: if (eeepc->cpufv_disabled) - pr_warning("cpufv enabled (not officially supported " + pr_warn("cpufv enabled (not officially supported " "on this model)\n"); eeepc->cpufv_disabled = false; return rv; @@ -609,7 +609,7 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc, acpi_handle handle) bus = port->subordinate; if (!bus) { - pr_warning("Unable to find PCI bus?\n"); + pr_warn("Unable to find PCI bus 1?\n"); goto out_unlock; } @@ -621,12 +621,12 @@ static void eeepc_rfkill_hotplug(struct eeepc_laptop *eeepc, acpi_handle handle) absent = (l == 0xffffffff); if (blocked != absent) { - pr_warning("BIOS says wireless lan is %s, " - "but the pci device is %s\n", + pr_warn("BIOS says wireless lan is %s, " + "but the pci device is %s\n", blocked ? "blocked" : "unblocked", absent ? "absent" : "present"); - pr_warning("skipped wireless hotplug as probably " - "inappropriate for this model\n"); + pr_warn("skipped wireless hotplug as probably " + "inappropriate for this model\n"); goto out_unlock; } @@ -691,7 +691,8 @@ static int eeepc_register_rfkill_notifier(struct eeepc_laptop *eeepc, eeepc_rfkill_notify, eeepc); if (ACPI_FAILURE(status)) - pr_warning("Failed to register notify on %s\n", node); + pr_warn("Failed to register notify on %s\n", node); + /* * Refresh pci hotplug in case the rfkill state was * changed during setup. diff --git a/drivers/platform/x86/eeepc-wmi.c b/drivers/platform/x86/eeepc-wmi.c index 649dcadd8ea..4aa867a9b88 100644 --- a/drivers/platform/x86/eeepc-wmi.c +++ b/drivers/platform/x86/eeepc-wmi.c @@ -84,7 +84,7 @@ static const struct key_entry eeepc_wmi_keymap[] = { static acpi_status eeepc_wmi_parse_device(acpi_handle handle, u32 level, void *context, void **retval) { - pr_warning("Found legacy ATKD device (%s)", EEEPC_ACPI_HID); + pr_warn("Found legacy ATKD device (%s)\n", EEEPC_ACPI_HID); *(bool *)context = true; return AE_CTRL_TERMINATE; } @@ -105,12 +105,12 @@ static int eeepc_wmi_check_atkd(void) static int eeepc_wmi_probe(struct platform_device *pdev) { if (eeepc_wmi_check_atkd()) { - pr_warning("WMI device present, but legacy ATKD device is also " - "present and enabled."); - pr_warning("You probably booted with acpi_osi=\"Linux\" or " - "acpi_osi=\"!Windows 2009\""); - pr_warning("Can't load eeepc-wmi, use default acpi_osi " - "(preferred) or eeepc-laptop"); + pr_warn("WMI device present, but legacy ATKD device is also " + "present and enabled\n"); + pr_warn("You probably booted with acpi_osi=\"Linux\" or " + "acpi_osi=\"!Windows 2009\"\n"); + pr_warn("Can't load eeepc-wmi, use default acpi_osi " + "(preferred) or eeepc-laptop\n"); return -EBUSY; } return 0; diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c index 493054c2dbe..6b26666b37f 100644 --- a/drivers/platform/x86/fujitsu-laptop.c +++ b/drivers/platform/x86/fujitsu-laptop.c @@ -56,6 +56,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -585,8 +587,7 @@ static struct platform_driver fujitsupf_driver = { static void dmi_check_cb_common(const struct dmi_system_id *id) { acpi_handle handle; - printk(KERN_INFO "fujitsu-laptop: Identified laptop model '%s'.\n", - id->ident); + pr_info("Identified laptop model '%s'\n", id->ident); if (use_alt_lcd_levels == -1) { if (ACPI_SUCCESS(acpi_get_handle(NULL, "\\_SB.PCI0.LPCB.FJEX.SBL2", &handle))) @@ -691,11 +692,11 @@ static int acpi_fujitsu_add(struct acpi_device *device) result = acpi_bus_update_power(fujitsu->acpi_handle, &state); if (result) { - printk(KERN_ERR "Error reading power state\n"); + pr_err("Error reading power state\n"); goto err_unregister_input_dev; } - printk(KERN_INFO "ACPI: %s [%s] (%s)\n", + pr_info("ACPI: %s [%s] (%s)\n", acpi_device_name(device), acpi_device_bid(device), !device->power.state ? "on" : "off"); @@ -707,7 +708,7 @@ static int acpi_fujitsu_add(struct acpi_device *device) if (ACPI_FAILURE (acpi_evaluate_object (device->handle, METHOD_NAME__INI, NULL, NULL))) - printk(KERN_ERR "_INI Method failed\n"); + pr_err("_INI Method failed\n"); } /* do config (detect defaults) */ @@ -827,7 +828,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) error = kfifo_alloc(&fujitsu_hotkey->fifo, RINGBUFFERSIZE * sizeof(int), GFP_KERNEL); if (error) { - printk(KERN_ERR "kfifo_alloc failed\n"); + pr_err("kfifo_alloc failed\n"); goto err_stop; } @@ -859,13 +860,13 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) result = acpi_bus_update_power(fujitsu_hotkey->acpi_handle, &state); if (result) { - printk(KERN_ERR "Error reading power state\n"); + pr_err("Error reading power state\n"); goto err_unregister_input_dev; } - printk(KERN_INFO "ACPI: %s [%s] (%s)\n", - acpi_device_name(device), acpi_device_bid(device), - !device->power.state ? "on" : "off"); + pr_info("ACPI: %s [%s] (%s)\n", + acpi_device_name(device), acpi_device_bid(device), + !device->power.state ? "on" : "off"); fujitsu_hotkey->dev = device; @@ -875,7 +876,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if (ACPI_FAILURE (acpi_evaluate_object (device->handle, METHOD_NAME__INI, NULL, NULL))) - printk(KERN_ERR "_INI Method failed\n"); + pr_err("_INI Method failed\n"); } i = 0; @@ -897,8 +898,7 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) call_fext_func(FUNC_RFKILL, 0x4, 0x0, 0x0); /* Suspect this is a keymap of the application panel, print it */ - printk(KERN_INFO "fujitsu-laptop: BTNI: [0x%x]\n", - call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0)); + pr_info("BTNI: [0x%x]\n", call_fext_func(FUNC_BUTTONS, 0x0, 0x0, 0x0)); #if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE) if (call_fext_func(FUNC_LEDS, 0x0, 0x0, 0x0) & LOGOLAMP_POWERON) { @@ -907,8 +907,8 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if (result == 0) { fujitsu_hotkey->logolamp_registered = 1; } else { - printk(KERN_ERR "fujitsu-laptop: Could not register " - "LED handler for logo lamp, error %i\n", result); + pr_err("Could not register LED handler for logo lamp, error %i\n", + result); } } @@ -919,8 +919,8 @@ static int acpi_fujitsu_hotkey_add(struct acpi_device *device) if (result == 0) { fujitsu_hotkey->kblamps_registered = 1; } else { - printk(KERN_ERR "fujitsu-laptop: Could not register " - "LED handler for keyboard lamps, error %i\n", result); + pr_err("Could not register LED handler for keyboard lamps, error %i\n", + result); } } #endif @@ -1169,8 +1169,7 @@ static int __init fujitsu_init(void) fujitsu->bl_device->props.power = 0; } - printk(KERN_INFO "fujitsu-laptop: driver " FUJITSU_DRIVER_VERSION - " successfully loaded.\n"); + pr_info("driver " FUJITSU_DRIVER_VERSION " successfully loaded\n"); return 0; @@ -1216,7 +1215,7 @@ static void __exit fujitsu_cleanup(void) kfree(fujitsu); - printk(KERN_INFO "fujitsu-laptop: driver unloaded.\n"); + pr_info("driver unloaded\n"); } module_init(fujitsu_init); diff --git a/drivers/platform/x86/hdaps.c b/drivers/platform/x86/hdaps.c index 067bf36d32f..5a34973dc16 100644 --- a/drivers/platform/x86/hdaps.c +++ b/drivers/platform/x86/hdaps.c @@ -26,6 +26,8 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/delay.h> #include <linux/platform_device.h> #include <linux/input-polldev.h> @@ -238,7 +240,7 @@ static int hdaps_device_init(void) __check_latch(0x1611, 0x01)) goto out; - printk(KERN_DEBUG "hdaps: initial latch check good (0x%02x).\n", + printk(KERN_DEBUG "hdaps: initial latch check good (0x%02x)\n", __get_latch(0x1611)); outb(0x17, 0x1610); @@ -299,7 +301,7 @@ static int hdaps_probe(struct platform_device *dev) if (ret) return ret; - printk(KERN_INFO "hdaps: device successfully initialized.\n"); + pr_info("device successfully initialized\n"); return 0; } @@ -480,7 +482,7 @@ static struct attribute_group hdaps_attribute_group = { /* hdaps_dmi_match - found a match. return one, short-circuiting the hunt. */ static int __init hdaps_dmi_match(const struct dmi_system_id *id) { - printk(KERN_INFO "hdaps: %s detected.\n", id->ident); + pr_info("%s detected\n", id->ident); return 1; } @@ -488,8 +490,7 @@ static int __init hdaps_dmi_match(const struct dmi_system_id *id) static int __init hdaps_dmi_match_invert(const struct dmi_system_id *id) { hdaps_invert = (unsigned long)id->driver_data; - printk(KERN_INFO "hdaps: inverting axis (%u) readings.\n", - hdaps_invert); + pr_info("inverting axis (%u) readings\n", hdaps_invert); return hdaps_dmi_match(id); } @@ -543,7 +544,7 @@ static int __init hdaps_init(void) int ret; if (!dmi_check_system(hdaps_whitelist)) { - printk(KERN_WARNING "hdaps: supported laptop not found!\n"); + pr_warn("supported laptop not found!\n"); ret = -ENODEV; goto out; } @@ -595,7 +596,7 @@ static int __init hdaps_init(void) if (ret) goto out_idev; - printk(KERN_INFO "hdaps: driver successfully loaded.\n"); + pr_info("driver successfully loaded\n"); return 0; out_idev: @@ -609,7 +610,7 @@ out_driver: out_region: release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS); out: - printk(KERN_WARNING "hdaps: driver init failed (ret=%d)!\n", ret); + pr_warn("driver init failed (ret=%d)!\n", ret); return ret; } @@ -622,7 +623,7 @@ static void __exit hdaps_exit(void) platform_driver_unregister(&hdaps_driver); release_region(HDAPS_LOW_PORT, HDAPS_NR_PORTS); - printk(KERN_INFO "hdaps: driver unloaded.\n"); + pr_info("driver unloaded\n"); } module_init(hdaps_init); diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c index 1bc4a7539ba..f94017bcdd6 100644 --- a/drivers/platform/x86/hp-wmi.c +++ b/drivers/platform/x86/hp-wmi.c @@ -24,6 +24,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -54,9 +56,6 @@ MODULE_ALIAS("wmi:5FB7F034-2C63-45e9-BE91-3D44E2C707E4"); #define HPWMI_HOTKEY_QUERY 0xc #define HPWMI_WIRELESS2_QUERY 0x1b -#define PREFIX "HP WMI: " -#define UNIMP "Unimplemented " - enum hp_wmi_radio { HPWMI_WIFI = 0, HPWMI_BLUETOOTH = 1, @@ -228,9 +227,8 @@ static int hp_wmi_perform_query(int query, int write, void *buffer, if (bios_return->return_code) { if (bios_return->return_code != HPWMI_RET_UNKNOWN_CMDTYPE) - printk(KERN_WARNING PREFIX "query 0x%x returned " - "error 0x%x\n", - query, bios_return->return_code); + pr_warn("query 0x%x returned error 0x%x\n", + query, bios_return->return_code); kfree(obj); return bios_return->return_code; } @@ -384,8 +382,7 @@ static int hp_wmi_rfkill2_refresh(void) if (num >= state.count || devstate->rfkill_id != rfkill2[i].id) { - printk(KERN_WARNING PREFIX "power configuration of " - "the wireless devices unexpectedly changed\n"); + pr_warn("power configuration of the wireless devices unexpectedly changed\n"); continue; } @@ -471,7 +468,7 @@ static void hp_wmi_notify(u32 value, void *context) status = wmi_get_event_data(value, &response); if (status != AE_OK) { - printk(KERN_INFO PREFIX "bad event status 0x%x\n", status); + pr_info("bad event status 0x%x\n", status); return; } @@ -480,8 +477,7 @@ static void hp_wmi_notify(u32 value, void *context) if (!obj) return; if (obj->type != ACPI_TYPE_BUFFER) { - printk(KERN_INFO "hp-wmi: Unknown response received %d\n", - obj->type); + pr_info("Unknown response received %d\n", obj->type); kfree(obj); return; } @@ -498,8 +494,7 @@ static void hp_wmi_notify(u32 value, void *context) event_id = *location; event_data = *(location + 2); } else { - printk(KERN_INFO "hp-wmi: Unknown buffer length %d\n", - obj->buffer.length); + pr_info("Unknown buffer length %d\n", obj->buffer.length); kfree(obj); return; } @@ -527,8 +522,7 @@ static void hp_wmi_notify(u32 value, void *context) if (!sparse_keymap_report_event(hp_wmi_input_dev, key_code, 1, true)) - printk(KERN_INFO PREFIX "Unknown key code - 0x%x\n", - key_code); + pr_info("Unknown key code - 0x%x\n", key_code); break; case HPWMI_WIRELESS: if (rfkill2_count) { @@ -550,14 +544,12 @@ static void hp_wmi_notify(u32 value, void *context) hp_wmi_get_hw_state(HPWMI_WWAN)); break; case HPWMI_CPU_BATTERY_THROTTLE: - printk(KERN_INFO PREFIX UNIMP "CPU throttle because of 3 Cell" - " battery event detected\n"); + pr_info("Unimplemented CPU throttle because of 3 Cell battery event detected\n"); break; case HPWMI_LOCK_SWITCH: break; default: - printk(KERN_INFO PREFIX "Unknown event_id - %d - 0x%x\n", - event_id, event_data); + pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data); break; } } @@ -705,7 +697,7 @@ static int __devinit hp_wmi_rfkill2_setup(struct platform_device *device) return err; if (state.count > HPWMI_MAX_RFKILL2_DEVICES) { - printk(KERN_WARNING PREFIX "unable to parse 0x1b query output\n"); + pr_warn("unable to parse 0x1b query output\n"); return -EINVAL; } @@ -727,14 +719,14 @@ static int __devinit hp_wmi_rfkill2_setup(struct platform_device *device) name = "hp-wwan"; break; default: - printk(KERN_WARNING PREFIX "unknown device type 0x%x\n", - state.device[i].radio_type); + pr_warn("unknown device type 0x%x\n", + state.device[i].radio_type); continue; } if (!state.device[i].vendor_id) { - printk(KERN_WARNING PREFIX "zero device %d while %d " - "reported\n", i, state.count); + pr_warn("zero device %d while %d reported\n", + i, state.count); continue; } @@ -755,8 +747,7 @@ static int __devinit hp_wmi_rfkill2_setup(struct platform_device *device) IS_HWBLOCKED(state.device[i].power)); if (!(state.device[i].power & HPWMI_POWER_BIOS)) - printk(KERN_INFO PREFIX "device %s blocked by BIOS\n", - name); + pr_info("device %s blocked by BIOS\n", name); err = rfkill_register(rfkill); if (err) { diff --git a/drivers/platform/x86/ibm_rtl.c b/drivers/platform/x86/ibm_rtl.c index b1396e5b295..811d436cd67 100644 --- a/drivers/platform/x86/ibm_rtl.c +++ b/drivers/platform/x86/ibm_rtl.c @@ -22,6 +22,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/delay.h> #include <linux/module.h> @@ -69,9 +71,10 @@ struct ibm_rtl_table { #define RTL_SIGNATURE 0x0000005f4c54525fULL #define RTL_MASK 0x000000ffffffffffULL -#define RTL_DEBUG(A, ...) do { \ - if (debug) \ - pr_info("ibm-rtl: " A, ##__VA_ARGS__ ); \ +#define RTL_DEBUG(fmt, ...) \ +do { \ + if (debug) \ + pr_info(fmt, ##__VA_ARGS__); \ } while (0) static DEFINE_MUTEX(rtl_lock); @@ -114,7 +117,7 @@ static int ibm_rtl_write(u8 value) int ret = 0, count = 0; static u32 cmd_port_val; - RTL_DEBUG("%s(%d)\n", __FUNCTION__, value); + RTL_DEBUG("%s(%d)\n", __func__, value); value = value == 1 ? RTL_CMD_ENTER_PRTM : RTL_CMD_EXIT_PRTM; @@ -144,8 +147,8 @@ static int ibm_rtl_write(u8 value) while (ioread8(&rtl_table->command)) { msleep(10); if (count++ > 500) { - pr_err("ibm-rtl: Hardware not responding to " - "mode switch request\n"); + pr_err("Hardware not responding to " + "mode switch request\n"); ret = -EIO; break; } @@ -250,7 +253,7 @@ static int __init ibm_rtl_init(void) { int ret = -ENODEV, i; if (force) - pr_warning("ibm-rtl: module loaded by force\n"); + pr_warn("module loaded by force\n"); /* first ensure that we are running on IBM HW */ else if (efi_enabled || !dmi_check_system(ibm_rtl_dmi_table)) return -ENODEV; @@ -288,19 +291,19 @@ static int __init ibm_rtl_init(void) { if ((readq(&tmp->signature) & RTL_MASK) == RTL_SIGNATURE) { phys_addr_t addr; unsigned int plen; - RTL_DEBUG("found RTL_SIGNATURE at %#llx\n", (u64)tmp); + RTL_DEBUG("found RTL_SIGNATURE at %p\n", tmp); rtl_table = tmp; /* The address, value, width and offset are platform * dependent and found in the ibm_rtl_table */ rtl_cmd_width = ioread8(&rtl_table->cmd_granularity); rtl_cmd_type = ioread8(&rtl_table->cmd_address_type); RTL_DEBUG("rtl_cmd_width = %u, rtl_cmd_type = %u\n", - rtl_cmd_width, rtl_cmd_type); + rtl_cmd_width, rtl_cmd_type); addr = ioread32(&rtl_table->cmd_port_address); RTL_DEBUG("addr = %#llx\n", (unsigned long long)addr); plen = rtl_cmd_width/sizeof(char); rtl_cmd_addr = rtl_port_map(addr, plen); - RTL_DEBUG("rtl_cmd_addr = %#llx\n", (u64)rtl_cmd_addr); + RTL_DEBUG("rtl_cmd_addr = %p\n", rtl_cmd_addr); if (!rtl_cmd_addr) { ret = -ENOMEM; break; diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 21b101899ba..bfdda33feb2 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -20,6 +20,8 @@ * 02110-1301, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> diff --git a/drivers/platform/x86/intel_menlow.c b/drivers/platform/x86/intel_menlow.c index eacd5da7dd2..809adea4965 100644 --- a/drivers/platform/x86/intel_menlow.c +++ b/drivers/platform/x86/intel_menlow.c @@ -27,6 +27,8 @@ * to get/set bandwidth. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -135,8 +137,7 @@ static int memory_set_cur_bandwidth(struct thermal_cooling_device *cdev, acpi_evaluate_integer(handle, MEMORY_SET_BANDWIDTH, &arg_list, &temp); - printk(KERN_INFO - "Bandwidth value was %ld: status is %d\n", state, status); + pr_info("Bandwidth value was %ld: status is %d\n", state, status); if (ACPI_FAILURE(status)) return -EFAULT; diff --git a/drivers/platform/x86/intel_mid_powerbtn.c b/drivers/platform/x86/intel_mid_powerbtn.c index 213e79ba68d..f1ae5078b7e 100644 --- a/drivers/platform/x86/intel_mid_powerbtn.c +++ b/drivers/platform/x86/intel_mid_powerbtn.c @@ -23,58 +23,48 @@ #include <linux/slab.h> #include <linux/platform_device.h> #include <linux/input.h> + #include <asm/intel_scu_ipc.h> #define DRIVER_NAME "msic_power_btn" -#define MSIC_IRQ_STAT 0x02 - #define MSIC_IRQ_PB (1 << 0) -#define MSIC_PB_CONFIG 0x3e #define MSIC_PB_STATUS 0x3f - #define MSIC_PB_LEVEL (1 << 3) /* 1 - release, 0 - press */ - -struct mfld_pb_priv { - struct input_dev *input; - unsigned int irq; -}; +#define MSIC_PB_LEVEL (1 << 3) /* 1 - release, 0 - press */ static irqreturn_t mfld_pb_isr(int irq, void *dev_id) { - struct mfld_pb_priv *priv = dev_id; + struct input_dev *input = dev_id; int ret; u8 pbstat; ret = intel_scu_ipc_ioread8(MSIC_PB_STATUS, &pbstat); - if (ret < 0) - return IRQ_HANDLED; - - input_event(priv->input, EV_KEY, KEY_POWER, !(pbstat & MSIC_PB_LEVEL)); - input_sync(priv->input); + if (ret < 0) { + dev_err(input->dev.parent, "Read error %d while reading" + " MSIC_PB_STATUS\n", ret); + } else { + input_event(input, EV_KEY, KEY_POWER, + !(pbstat & MSIC_PB_LEVEL)); + input_sync(input); + } return IRQ_HANDLED; } static int __devinit mfld_pb_probe(struct platform_device *pdev) { - struct mfld_pb_priv *priv; struct input_dev *input; - int irq; + int irq = platform_get_irq(pdev, 0); int error; - irq = platform_get_irq(pdev, 0); if (irq < 0) return -EINVAL; - priv = kzalloc(sizeof(struct mfld_pb_priv), GFP_KERNEL); input = input_allocate_device(); - if (!priv || !input) { - error = -ENOMEM; - goto err_free_mem; + if (!input) { + dev_err(&pdev->dev, "Input device allocation error\n"); + return -ENOMEM; } - priv->input = input; - priv->irq = irq; - input->name = pdev->name; input->phys = "power-button/input0"; input->id.bustype = BUS_HOST; @@ -82,42 +72,40 @@ static int __devinit mfld_pb_probe(struct platform_device *pdev) input_set_capability(input, EV_KEY, KEY_POWER); - error = request_threaded_irq(priv->irq, NULL, mfld_pb_isr, - 0, DRIVER_NAME, priv); + error = request_threaded_irq(irq, NULL, mfld_pb_isr, 0, + DRIVER_NAME, input); if (error) { - dev_err(&pdev->dev, - "unable to request irq %d for mfld power button\n", - irq); - goto err_free_mem; + dev_err(&pdev->dev, "Unable to request irq %d for mfld power" + "button\n", irq); + goto err_free_input; } error = input_register_device(input); if (error) { - dev_err(&pdev->dev, - "unable to register input dev, error %d\n", error); + dev_err(&pdev->dev, "Unable to register input dev, error " + "%d\n", error); goto err_free_irq; } - platform_set_drvdata(pdev, priv); + platform_set_drvdata(pdev, input); return 0; err_free_irq: - free_irq(priv->irq, priv); -err_free_mem: + free_irq(irq, input); +err_free_input: input_free_device(input); - kfree(priv); return error; } static int __devexit mfld_pb_remove(struct platform_device *pdev) { - struct mfld_pb_priv *priv = platform_get_drvdata(pdev); - - free_irq(priv->irq, priv); - input_unregister_device(priv->input); - kfree(priv); + struct input_dev *input = platform_get_drvdata(pdev); + int irq = platform_get_irq(pdev, 0); + free_irq(irq, input); + input_unregister_device(input); platform_set_drvdata(pdev, NULL); + return 0; } diff --git a/drivers/platform/x86/intel_mid_thermal.c b/drivers/platform/x86/intel_mid_thermal.c index c2f4bd8013b..3a578323122 100644 --- a/drivers/platform/x86/intel_mid_thermal.c +++ b/drivers/platform/x86/intel_mid_thermal.c @@ -37,49 +37,50 @@ #include <asm/intel_scu_ipc.h> /* Number of thermal sensors */ -#define MSIC_THERMAL_SENSORS 4 +#define MSIC_THERMAL_SENSORS 4 /* ADC1 - thermal registers */ -#define MSIC_THERM_ADC1CNTL1 0x1C0 -#define MSIC_ADC_ENBL 0x10 -#define MSIC_ADC_START 0x08 +#define MSIC_THERM_ADC1CNTL1 0x1C0 +#define MSIC_ADC_ENBL 0x10 +#define MSIC_ADC_START 0x08 -#define MSIC_THERM_ADC1CNTL3 0x1C2 -#define MSIC_ADCTHERM_ENBL 0x04 -#define MSIC_ADCRRDATA_ENBL 0x05 -#define MSIC_CHANL_MASK_VAL 0x0F +#define MSIC_THERM_ADC1CNTL3 0x1C2 +#define MSIC_ADCTHERM_ENBL 0x04 +#define MSIC_ADCRRDATA_ENBL 0x05 +#define MSIC_CHANL_MASK_VAL 0x0F -#define MSIC_STOPBIT_MASK 16 -#define MSIC_ADCTHERM_MASK 4 -#define ADC_CHANLS_MAX 15 /* Number of ADC channels */ -#define ADC_LOOP_MAX (ADC_CHANLS_MAX - MSIC_THERMAL_SENSORS) +#define MSIC_STOPBIT_MASK 16 +#define MSIC_ADCTHERM_MASK 4 +/* Number of ADC channels */ +#define ADC_CHANLS_MAX 15 +#define ADC_LOOP_MAX (ADC_CHANLS_MAX - MSIC_THERMAL_SENSORS) /* ADC channel code values */ -#define SKIN_SENSOR0_CODE 0x08 -#define SKIN_SENSOR1_CODE 0x09 -#define SYS_SENSOR_CODE 0x0A -#define MSIC_DIE_SENSOR_CODE 0x03 +#define SKIN_SENSOR0_CODE 0x08 +#define SKIN_SENSOR1_CODE 0x09 +#define SYS_SENSOR_CODE 0x0A +#define MSIC_DIE_SENSOR_CODE 0x03 -#define SKIN_THERM_SENSOR0 0 -#define SKIN_THERM_SENSOR1 1 -#define SYS_THERM_SENSOR2 2 -#define MSIC_DIE_THERM_SENSOR3 3 +#define SKIN_THERM_SENSOR0 0 +#define SKIN_THERM_SENSOR1 1 +#define SYS_THERM_SENSOR2 2 +#define MSIC_DIE_THERM_SENSOR3 3 /* ADC code range */ -#define ADC_MAX 977 -#define ADC_MIN 162 -#define ADC_VAL0C 887 -#define ADC_VAL20C 720 -#define ADC_VAL40C 508 -#define ADC_VAL60C 315 +#define ADC_MAX 977 +#define ADC_MIN 162 +#define ADC_VAL0C 887 +#define ADC_VAL20C 720 +#define ADC_VAL40C 508 +#define ADC_VAL60C 315 /* ADC base addresses */ -#define ADC_CHNL_START_ADDR 0x1C5 /* increments by 1 */ -#define ADC_DATA_START_ADDR 0x1D4 /* increments by 2 */ +#define ADC_CHNL_START_ADDR 0x1C5 /* increments by 1 */ +#define ADC_DATA_START_ADDR 0x1D4 /* increments by 2 */ /* MSIC die attributes */ -#define MSIC_DIE_ADC_MIN 488 -#define MSIC_DIE_ADC_MAX 1004 +#define MSIC_DIE_ADC_MIN 488 +#define MSIC_DIE_ADC_MAX 1004 /* This holds the address of the first free ADC channel, * among the 15 channels @@ -87,15 +88,15 @@ static int channel_index; struct platform_info { - struct platform_device *pdev; - struct thermal_zone_device *tzd[MSIC_THERMAL_SENSORS]; + struct platform_device *pdev; + struct thermal_zone_device *tzd[MSIC_THERMAL_SENSORS]; }; struct thermal_device_info { - unsigned int chnl_addr; - int direct; - /* This holds the current temperature in millidegree celsius */ - long curr_temp; + unsigned int chnl_addr; + int direct; + /* This holds the current temperature in millidegree celsius */ + long curr_temp; }; /** @@ -106,7 +107,7 @@ struct thermal_device_info { */ static int to_msic_die_temp(uint16_t adc_val) { - return (368 * (adc_val) / 1000) - 220; + return (368 * (adc_val) / 1000) - 220; } /** @@ -118,7 +119,7 @@ static int to_msic_die_temp(uint16_t adc_val) */ static int is_valid_adc(uint16_t adc_val, uint16_t min, uint16_t max) { - return (adc_val >= min) && (adc_val <= max); + return (adc_val >= min) && (adc_val <= max); } /** @@ -136,35 +137,35 @@ static int is_valid_adc(uint16_t adc_val, uint16_t min, uint16_t max) */ static int adc_to_temp(int direct, uint16_t adc_val, unsigned long *tp) { - int temp; - - /* Direct conversion for die temperature */ - if (direct) { - if (is_valid_adc(adc_val, MSIC_DIE_ADC_MIN, MSIC_DIE_ADC_MAX)) { - *tp = to_msic_die_temp(adc_val) * 1000; - return 0; - } - return -ERANGE; - } - - if (!is_valid_adc(adc_val, ADC_MIN, ADC_MAX)) - return -ERANGE; - - /* Linear approximation for skin temperature */ - if (adc_val > ADC_VAL0C) - temp = 177 - (adc_val/5); - else if ((adc_val <= ADC_VAL0C) && (adc_val > ADC_VAL20C)) - temp = 111 - (adc_val/8); - else if ((adc_val <= ADC_VAL20C) && (adc_val > ADC_VAL40C)) - temp = 92 - (adc_val/10); - else if ((adc_val <= ADC_VAL40C) && (adc_val > ADC_VAL60C)) - temp = 91 - (adc_val/10); - else - temp = 112 - (adc_val/6); - - /* Convert temperature in celsius to milli degree celsius */ - *tp = temp * 1000; - return 0; + int temp; + + /* Direct conversion for die temperature */ + if (direct) { + if (is_valid_adc(adc_val, MSIC_DIE_ADC_MIN, MSIC_DIE_ADC_MAX)) { + *tp = to_msic_die_temp(adc_val) * 1000; + return 0; + } + return -ERANGE; + } + + if (!is_valid_adc(adc_val, ADC_MIN, ADC_MAX)) + return -ERANGE; + + /* Linear approximation for skin temperature */ + if (adc_val > ADC_VAL0C) + temp = 177 - (adc_val/5); + else if ((adc_val <= ADC_VAL0C) && (adc_val > ADC_VAL20C)) + temp = 111 - (adc_val/8); + else if ((adc_val <= ADC_VAL20C) && (adc_val > ADC_VAL40C)) + temp = 92 - (adc_val/10); + else if ((adc_val <= ADC_VAL40C) && (adc_val > ADC_VAL60C)) + temp = 91 - (adc_val/10); + else + temp = 112 - (adc_val/6); + + /* Convert temperature in celsius to milli degree celsius */ + *tp = temp * 1000; + return 0; } /** @@ -178,47 +179,47 @@ static int adc_to_temp(int direct, uint16_t adc_val, unsigned long *tp) */ static int mid_read_temp(struct thermal_zone_device *tzd, unsigned long *temp) { - struct thermal_device_info *td_info = tzd->devdata; - uint16_t adc_val, addr; - uint8_t data = 0; - int ret; - unsigned long curr_temp; - - - addr = td_info->chnl_addr; - - /* Enable the msic for conversion before reading */ - ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCRRDATA_ENBL); - if (ret) - return ret; - - /* Re-toggle the RRDATARD bit (temporary workaround) */ - ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCTHERM_ENBL); - if (ret) - return ret; - - /* Read the higher bits of data */ - ret = intel_scu_ipc_ioread8(addr, &data); - if (ret) - return ret; - - /* Shift bits to accommodate the lower two data bits */ - adc_val = (data << 2); - addr++; - - ret = intel_scu_ipc_ioread8(addr, &data);/* Read lower bits */ - if (ret) - return ret; - - /* Adding lower two bits to the higher bits */ - data &= 03; - adc_val += data; - - /* Convert ADC value to temperature */ - ret = adc_to_temp(td_info->direct, adc_val, &curr_temp); - if (ret == 0) - *temp = td_info->curr_temp = curr_temp; - return ret; + struct thermal_device_info *td_info = tzd->devdata; + uint16_t adc_val, addr; + uint8_t data = 0; + int ret; + unsigned long curr_temp; + + + addr = td_info->chnl_addr; + + /* Enable the msic for conversion before reading */ + ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCRRDATA_ENBL); + if (ret) + return ret; + + /* Re-toggle the RRDATARD bit (temporary workaround) */ + ret = intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL3, MSIC_ADCTHERM_ENBL); + if (ret) + return ret; + + /* Read the higher bits of data */ + ret = intel_scu_ipc_ioread8(addr, &data); + if (ret) + return ret; + + /* Shift bits to accommodate the lower two data bits */ + adc_val = (data << 2); + addr++; + + ret = intel_scu_ipc_ioread8(addr, &data);/* Read lower bits */ + if (ret) + return ret; + + /* Adding lower two bits to the higher bits */ + data &= 03; + adc_val += data; + + /* Convert ADC value to temperature */ + ret = adc_to_temp(td_info->direct, adc_val, &curr_temp); + if (ret == 0) + *temp = td_info->curr_temp = curr_temp; + return ret; } /** @@ -231,22 +232,21 @@ static int mid_read_temp(struct thermal_zone_device *tzd, unsigned long *temp) */ static int configure_adc(int val) { - int ret; - uint8_t data; - - ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data); - if (ret) - return ret; - - if (val) { - /* Enable and start the ADC */ - data |= (MSIC_ADC_ENBL | MSIC_ADC_START); - } else { - /* Just stop the ADC */ - data &= (~MSIC_ADC_START); - } - - return intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL1, data); + int ret; + uint8_t data; + + ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data); + if (ret) + return ret; + + if (val) { + /* Enable and start the ADC */ + data |= (MSIC_ADC_ENBL | MSIC_ADC_START); + } else { + /* Just stop the ADC */ + data &= (~MSIC_ADC_START); + } + return intel_scu_ipc_iowrite8(MSIC_THERM_ADC1CNTL1, data); } /** @@ -259,30 +259,30 @@ static int configure_adc(int val) */ static int set_up_therm_channel(u16 base_addr) { - int ret; - - /* Enable all the sensor channels */ - ret = intel_scu_ipc_iowrite8(base_addr, SKIN_SENSOR0_CODE); - if (ret) - return ret; - - ret = intel_scu_ipc_iowrite8(base_addr + 1, SKIN_SENSOR1_CODE); - if (ret) - return ret; - - ret = intel_scu_ipc_iowrite8(base_addr + 2, SYS_SENSOR_CODE); - if (ret) - return ret; - - /* Since this is the last channel, set the stop bit - to 1 by ORing the DIE_SENSOR_CODE with 0x10 */ - ret = intel_scu_ipc_iowrite8(base_addr + 3, - (MSIC_DIE_SENSOR_CODE | 0x10)); - if (ret) - return ret; - - /* Enable ADC and start it */ - return configure_adc(1); + int ret; + + /* Enable all the sensor channels */ + ret = intel_scu_ipc_iowrite8(base_addr, SKIN_SENSOR0_CODE); + if (ret) + return ret; + + ret = intel_scu_ipc_iowrite8(base_addr + 1, SKIN_SENSOR1_CODE); + if (ret) + return ret; + + ret = intel_scu_ipc_iowrite8(base_addr + 2, SYS_SENSOR_CODE); + if (ret) + return ret; + + /* Since this is the last channel, set the stop bit + * to 1 by ORing the DIE_SENSOR_CODE with 0x10 */ + ret = intel_scu_ipc_iowrite8(base_addr + 3, + (MSIC_DIE_SENSOR_CODE | 0x10)); + if (ret) + return ret; + + /* Enable ADC and start it */ + return configure_adc(1); } /** @@ -293,13 +293,13 @@ static int set_up_therm_channel(u16 base_addr) */ static int reset_stopbit(uint16_t addr) { - int ret; - uint8_t data; - ret = intel_scu_ipc_ioread8(addr, &data); - if (ret) - return ret; - /* Set the stop bit to zero */ - return intel_scu_ipc_iowrite8(addr, (data & 0xEF)); + int ret; + uint8_t data; + ret = intel_scu_ipc_ioread8(addr, &data); + if (ret) + return ret; + /* Set the stop bit to zero */ + return intel_scu_ipc_iowrite8(addr, (data & 0xEF)); } /** @@ -317,30 +317,30 @@ static int reset_stopbit(uint16_t addr) */ static int find_free_channel(void) { - int ret; - int i; - uint8_t data; - - /* check whether ADC is enabled */ - ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data); - if (ret) - return ret; - - if ((data & MSIC_ADC_ENBL) == 0) - return 0; - - /* ADC is already enabled; Looking for an empty channel */ - for (i = 0; i < ADC_CHANLS_MAX; i++) { - ret = intel_scu_ipc_ioread8(ADC_CHNL_START_ADDR + i, &data); - if (ret) - return ret; - - if (data & MSIC_STOPBIT_MASK) { - ret = i; - break; - } - } - return (ret > ADC_LOOP_MAX) ? (-EINVAL) : ret; + int ret; + int i; + uint8_t data; + + /* check whether ADC is enabled */ + ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL1, &data); + if (ret) + return ret; + + if ((data & MSIC_ADC_ENBL) == 0) + return 0; + + /* ADC is already enabled; Looking for an empty channel */ + for (i = 0; i < ADC_CHANLS_MAX; i++) { + ret = intel_scu_ipc_ioread8(ADC_CHNL_START_ADDR + i, &data); + if (ret) + return ret; + + if (data & MSIC_STOPBIT_MASK) { + ret = i; + break; + } + } + return (ret > ADC_LOOP_MAX) ? (-EINVAL) : ret; } /** @@ -351,48 +351,48 @@ static int find_free_channel(void) */ static int mid_initialize_adc(struct device *dev) { - u8 data; - u16 base_addr; - int ret; - - /* - * Ensure that adctherm is disabled before we - * initialize the ADC - */ - ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL3, &data); - if (ret) - return ret; - - if (data & MSIC_ADCTHERM_MASK) - dev_warn(dev, "ADCTHERM already set"); - - /* Index of the first channel in which the stop bit is set */ - channel_index = find_free_channel(); - if (channel_index < 0) { - dev_err(dev, "No free ADC channels"); - return channel_index; - } - - base_addr = ADC_CHNL_START_ADDR + channel_index; - - if (!(channel_index == 0 || channel_index == ADC_LOOP_MAX)) { - /* Reset stop bit for channels other than 0 and 12 */ - ret = reset_stopbit(base_addr); - if (ret) - return ret; - - /* Index of the first free channel */ - base_addr++; - channel_index++; - } - - ret = set_up_therm_channel(base_addr); - if (ret) { - dev_err(dev, "unable to enable ADC"); - return ret; - } - dev_dbg(dev, "ADC initialization successful"); - return ret; + u8 data; + u16 base_addr; + int ret; + + /* + * Ensure that adctherm is disabled before we + * initialize the ADC + */ + ret = intel_scu_ipc_ioread8(MSIC_THERM_ADC1CNTL3, &data); + if (ret) + return ret; + + if (data & MSIC_ADCTHERM_MASK) + dev_warn(dev, "ADCTHERM already set"); + + /* Index of the first channel in which the stop bit is set */ + channel_index = find_free_channel(); + if (channel_index < 0) { + dev_err(dev, "No free ADC channels"); + return channel_index; + } + + base_addr = ADC_CHNL_START_ADDR + channel_index; + + if (!(channel_index == 0 || channel_index == ADC_LOOP_MAX)) { + /* Reset stop bit for channels other than 0 and 12 */ + ret = reset_stopbit(base_addr); + if (ret) + return ret; + + /* Index of the first free channel */ + base_addr++; + channel_index++; + } + + ret = set_up_therm_channel(base_addr); + if (ret) { + dev_err(dev, "unable to enable ADC"); + return ret; + } + dev_dbg(dev, "ADC initialization successful"); + return ret; } /** @@ -403,18 +403,18 @@ static int mid_initialize_adc(struct device *dev) */ static struct thermal_device_info *initialize_sensor(int index) { - struct thermal_device_info *td_info = - kzalloc(sizeof(struct thermal_device_info), GFP_KERNEL); - - if (!td_info) - return NULL; - - /* Set the base addr of the channel for this sensor */ - td_info->chnl_addr = ADC_DATA_START_ADDR + 2 * (channel_index + index); - /* Sensor 3 is direct conversion */ - if (index == 3) - td_info->direct = 1; - return td_info; + struct thermal_device_info *td_info = + kzalloc(sizeof(struct thermal_device_info), GFP_KERNEL); + + if (!td_info) + return NULL; + + /* Set the base addr of the channel for this sensor */ + td_info->chnl_addr = ADC_DATA_START_ADDR + 2 * (channel_index + index); + /* Sensor 3 is direct conversion */ + if (index == 3) + td_info->direct = 1; + return td_info; } /** @@ -425,7 +425,7 @@ static struct thermal_device_info *initialize_sensor(int index) */ static int mid_thermal_resume(struct platform_device *pdev) { - return mid_initialize_adc(&pdev->dev); + return mid_initialize_adc(&pdev->dev); } /** @@ -437,12 +437,12 @@ static int mid_thermal_resume(struct platform_device *pdev) */ static int mid_thermal_suspend(struct platform_device *pdev, pm_message_t mesg) { - /* - * This just stops the ADC and does not disable it. - * temporary workaround until we have a generic ADC driver. - * If 0 is passed, it disables the ADC. - */ - return configure_adc(0); + /* + * This just stops the ADC and does not disable it. + * temporary workaround until we have a generic ADC driver. + * If 0 is passed, it disables the ADC. + */ + return configure_adc(0); } /** @@ -453,16 +453,15 @@ static int mid_thermal_suspend(struct platform_device *pdev, pm_message_t mesg) */ static int read_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp) { - WARN_ON(tzd == NULL); - return mid_read_temp(tzd, temp); + WARN_ON(tzd == NULL); + return mid_read_temp(tzd, temp); } /* Can't be const */ static struct thermal_zone_device_ops tzd_ops = { - .get_temp = read_curr_temp, + .get_temp = read_curr_temp, }; - /** * mid_thermal_probe - mfld thermal initialize * @pdev: platform device structure @@ -472,46 +471,45 @@ static struct thermal_zone_device_ops tzd_ops = { */ static int mid_thermal_probe(struct platform_device *pdev) { - static char *name[MSIC_THERMAL_SENSORS] = { - "skin0", "skin1", "sys", "msicdie" - }; - - int ret; - int i; - struct platform_info *pinfo; - - pinfo = kzalloc(sizeof(struct platform_info), GFP_KERNEL); - if (!pinfo) - return -ENOMEM; - - /* Initializing the hardware */ - ret = mid_initialize_adc(&pdev->dev); - if (ret) { - dev_err(&pdev->dev, "ADC init failed"); - kfree(pinfo); - return ret; - } - - /* Register each sensor with the generic thermal framework*/ - for (i = 0; i < MSIC_THERMAL_SENSORS; i++) { - pinfo->tzd[i] = thermal_zone_device_register(name[i], - 0, initialize_sensor(i), - &tzd_ops, 0, 0, 0, 0); - if (IS_ERR(pinfo->tzd[i])) - goto reg_fail; - } - - pinfo->pdev = pdev; - platform_set_drvdata(pdev, pinfo); - return 0; + static char *name[MSIC_THERMAL_SENSORS] = { + "skin0", "skin1", "sys", "msicdie" + }; + + int ret; + int i; + struct platform_info *pinfo; + + pinfo = kzalloc(sizeof(struct platform_info), GFP_KERNEL); + if (!pinfo) + return -ENOMEM; + + /* Initializing the hardware */ + ret = mid_initialize_adc(&pdev->dev); + if (ret) { + dev_err(&pdev->dev, "ADC init failed"); + kfree(pinfo); + return ret; + } + + /* Register each sensor with the generic thermal framework*/ + for (i = 0; i < MSIC_THERMAL_SENSORS; i++) { + pinfo->tzd[i] = thermal_zone_device_register(name[i], + 0, initialize_sensor(i), &tzd_ops, 0, 0, 0, 0); + if (IS_ERR(pinfo->tzd[i])) + goto reg_fail; + } + + pinfo->pdev = pdev; + platform_set_drvdata(pdev, pinfo); + return 0; reg_fail: - ret = PTR_ERR(pinfo->tzd[i]); - while (--i >= 0) - thermal_zone_device_unregister(pinfo->tzd[i]); - configure_adc(0); - kfree(pinfo); - return ret; + ret = PTR_ERR(pinfo->tzd[i]); + while (--i >= 0) + thermal_zone_device_unregister(pinfo->tzd[i]); + configure_adc(0); + kfree(pinfo); + return ret; } /** @@ -523,49 +521,46 @@ reg_fail: */ static int mid_thermal_remove(struct platform_device *pdev) { - int i; - struct platform_info *pinfo = platform_get_drvdata(pdev); + int i; + struct platform_info *pinfo = platform_get_drvdata(pdev); - for (i = 0; i < MSIC_THERMAL_SENSORS; i++) - thermal_zone_device_unregister(pinfo->tzd[i]); + for (i = 0; i < MSIC_THERMAL_SENSORS; i++) + thermal_zone_device_unregister(pinfo->tzd[i]); - platform_set_drvdata(pdev, NULL); + kfree(pinfo); + platform_set_drvdata(pdev, NULL); - /* Stop the ADC */ - return configure_adc(0); + /* Stop the ADC */ + return configure_adc(0); } -/********************************************************************* - * Driver initialisation and finalization - *********************************************************************/ - #define DRIVER_NAME "msic_sensor" static const struct platform_device_id therm_id_table[] = { - { DRIVER_NAME, 1 }, - { } + { DRIVER_NAME, 1 }, + { } }; static struct platform_driver mid_thermal_driver = { - .driver = { - .name = DRIVER_NAME, - .owner = THIS_MODULE, - }, - .probe = mid_thermal_probe, - .suspend = mid_thermal_suspend, - .resume = mid_thermal_resume, - .remove = __devexit_p(mid_thermal_remove), - .id_table = therm_id_table, + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, + .probe = mid_thermal_probe, + .suspend = mid_thermal_suspend, + .resume = mid_thermal_resume, + .remove = __devexit_p(mid_thermal_remove), + .id_table = therm_id_table, }; static int __init mid_thermal_module_init(void) { - return platform_driver_register(&mid_thermal_driver); + return platform_driver_register(&mid_thermal_driver); } static void __exit mid_thermal_module_exit(void) { - platform_driver_unregister(&mid_thermal_driver); + platform_driver_unregister(&mid_thermal_driver); } module_init(mid_thermal_module_init); diff --git a/drivers/platform/x86/intel_oaktrail.c b/drivers/platform/x86/intel_oaktrail.c new file mode 100644 index 00000000000..e936364a609 --- /dev/null +++ b/drivers/platform/x86/intel_oaktrail.c @@ -0,0 +1,396 @@ +/* + * intel_oaktrail.c - Intel OakTrail Platform support. + * + * Copyright (C) 2010-2011 Intel Corporation + * Author: Yin Kangkai (kangkai.yin@intel.com) + * + * based on Compal driver, Copyright (C) 2008 Cezary Jackiewicz + * <cezary.jackiewicz (at) gmail.com>, based on MSI driver + * Copyright (C) 2006 Lennart Poettering <mzxreary (at) 0pointer (dot) de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + * + * This driver does below things: + * 1. registers itself in the Linux backlight control in + * /sys/class/backlight/intel_oaktrail/ + * + * 2. registers in the rfkill subsystem here: /sys/class/rfkill/rfkillX/ + * for these components: wifi, bluetooth, wwan (3g), gps + * + * This driver might work on other products based on Oaktrail. If you + * want to try it you can pass force=1 as argument to the module which + * will force it to load even when the DMI data doesn't identify the + * product as compatible. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/acpi.h> +#include <linux/fb.h> +#include <linux/mutex.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/backlight.h> +#include <linux/platform_device.h> +#include <linux/dmi.h> +#include <linux/rfkill.h> +#include <acpi/acpi_bus.h> +#include <acpi/acpi_drivers.h> + + +#define DRIVER_NAME "intel_oaktrail" +#define DRIVER_VERSION "0.4ac1" + +/* + * This is the devices status address in EC space, and the control bits + * definition: + * + * (1 << 0): Camera enable/disable, RW. + * (1 << 1): Bluetooth enable/disable, RW. + * (1 << 2): GPS enable/disable, RW. + * (1 << 3): WiFi enable/disable, RW. + * (1 << 4): WWAN (3G) enable/disalbe, RW. + * (1 << 5): Touchscreen enable/disable, Read Only. + */ +#define OT_EC_DEVICE_STATE_ADDRESS 0xD6 + +#define OT_EC_CAMERA_MASK (1 << 0) +#define OT_EC_BT_MASK (1 << 1) +#define OT_EC_GPS_MASK (1 << 2) +#define OT_EC_WIFI_MASK (1 << 3) +#define OT_EC_WWAN_MASK (1 << 4) +#define OT_EC_TS_MASK (1 << 5) + +/* + * This is the address in EC space and commands used to control LCD backlight: + * + * Two steps needed to change the LCD backlight: + * 1. write the backlight percentage into OT_EC_BL_BRIGHTNESS_ADDRESS; + * 2. write OT_EC_BL_CONTROL_ON_DATA into OT_EC_BL_CONTROL_ADDRESS. + * + * To read the LCD back light, just read out the value from + * OT_EC_BL_BRIGHTNESS_ADDRESS. + * + * LCD backlight brightness range: 0 - 100 (OT_EC_BL_BRIGHTNESS_MAX) + */ +#define OT_EC_BL_BRIGHTNESS_ADDRESS 0x44 +#define OT_EC_BL_BRIGHTNESS_MAX 100 +#define OT_EC_BL_CONTROL_ADDRESS 0x3A +#define OT_EC_BL_CONTROL_ON_DATA 0x1A + + +static int force; +module_param(force, bool, 0); +MODULE_PARM_DESC(force, "Force driver load, ignore DMI data"); + +static struct platform_device *oaktrail_device; +static struct backlight_device *oaktrail_bl_device; +static struct rfkill *bt_rfkill; +static struct rfkill *gps_rfkill; +static struct rfkill *wifi_rfkill; +static struct rfkill *wwan_rfkill; + + +/* rfkill */ +static int oaktrail_rfkill_set(void *data, bool blocked) +{ + u8 value; + u8 result; + unsigned long radio = (unsigned long) data; + + ec_read(OT_EC_DEVICE_STATE_ADDRESS, &result); + + if (!blocked) + value = (u8) (result | radio); + else + value = (u8) (result & ~radio); + + ec_write(OT_EC_DEVICE_STATE_ADDRESS, value); + + return 0; +} + +static const struct rfkill_ops oaktrail_rfkill_ops = { + .set_block = oaktrail_rfkill_set, +}; + +static struct rfkill *oaktrail_rfkill_new(char *name, enum rfkill_type type, + unsigned long mask) +{ + struct rfkill *rfkill_dev; + u8 value; + int err; + + rfkill_dev = rfkill_alloc(name, &oaktrail_device->dev, type, + &oaktrail_rfkill_ops, (void *)mask); + if (!rfkill_dev) + return ERR_PTR(-ENOMEM); + + ec_read(OT_EC_DEVICE_STATE_ADDRESS, &value); + rfkill_init_sw_state(rfkill_dev, (value & mask) != 1); + + err = rfkill_register(rfkill_dev); + if (err) { + rfkill_destroy(rfkill_dev); + return ERR_PTR(err); + } + + return rfkill_dev; +} + +static inline void __oaktrail_rfkill_cleanup(struct rfkill *rf) +{ + if (rf) { + rfkill_unregister(rf); + rfkill_destroy(rf); + } +} + +static void oaktrail_rfkill_cleanup(void) +{ + __oaktrail_rfkill_cleanup(wifi_rfkill); + __oaktrail_rfkill_cleanup(bt_rfkill); + __oaktrail_rfkill_cleanup(gps_rfkill); + __oaktrail_rfkill_cleanup(wwan_rfkill); +} + +static int oaktrail_rfkill_init(void) +{ + int ret; + + wifi_rfkill = oaktrail_rfkill_new("oaktrail-wifi", + RFKILL_TYPE_WLAN, + OT_EC_WIFI_MASK); + if (IS_ERR(wifi_rfkill)) { + ret = PTR_ERR(wifi_rfkill); + wifi_rfkill = NULL; + goto cleanup; + } + + bt_rfkill = oaktrail_rfkill_new("oaktrail-bluetooth", + RFKILL_TYPE_BLUETOOTH, + OT_EC_BT_MASK); + if (IS_ERR(bt_rfkill)) { + ret = PTR_ERR(bt_rfkill); + bt_rfkill = NULL; + goto cleanup; + } + + gps_rfkill = oaktrail_rfkill_new("oaktrail-gps", + RFKILL_TYPE_GPS, + OT_EC_GPS_MASK); + if (IS_ERR(gps_rfkill)) { + ret = PTR_ERR(gps_rfkill); + gps_rfkill = NULL; + goto cleanup; + } + + wwan_rfkill = oaktrail_rfkill_new("oaktrail-wwan", + RFKILL_TYPE_WWAN, + OT_EC_WWAN_MASK); + if (IS_ERR(wwan_rfkill)) { + ret = PTR_ERR(wwan_rfkill); + wwan_rfkill = NULL; + goto cleanup; + } + + return 0; + +cleanup: + oaktrail_rfkill_cleanup(); + return ret; +} + + +/* backlight */ +static int get_backlight_brightness(struct backlight_device *b) +{ + u8 value; + ec_read(OT_EC_BL_BRIGHTNESS_ADDRESS, &value); + + return value; +} + +static int set_backlight_brightness(struct backlight_device *b) +{ + u8 percent = (u8) b->props.brightness; + if (percent < 0 || percent > OT_EC_BL_BRIGHTNESS_MAX) + return -EINVAL; + + ec_write(OT_EC_BL_BRIGHTNESS_ADDRESS, percent); + ec_write(OT_EC_BL_CONTROL_ADDRESS, OT_EC_BL_CONTROL_ON_DATA); + + return 0; +} + +static const struct backlight_ops oaktrail_bl_ops = { + .get_brightness = get_backlight_brightness, + .update_status = set_backlight_brightness, +}; + +static int oaktrail_backlight_init(void) +{ + struct backlight_device *bd; + struct backlight_properties props; + + memset(&props, 0, sizeof(struct backlight_properties)); + props.max_brightness = OT_EC_BL_BRIGHTNESS_MAX; + bd = backlight_device_register(DRIVER_NAME, + &oaktrail_device->dev, NULL, + &oaktrail_bl_ops, + &props); + + if (IS_ERR(bd)) { + oaktrail_bl_device = NULL; + pr_warning("Unable to register backlight device\n"); + return PTR_ERR(bd); + } + + oaktrail_bl_device = bd; + + bd->props.brightness = get_backlight_brightness(bd); + bd->props.power = FB_BLANK_UNBLANK; + backlight_update_status(bd); + + return 0; +} + +static void oaktrail_backlight_exit(void) +{ + if (oaktrail_bl_device) + backlight_device_unregister(oaktrail_bl_device); +} + +static int __devinit oaktrail_probe(struct platform_device *pdev) +{ + return 0; +} + +static int __devexit oaktrail_remove(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver oaktrail_driver = { + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, + .probe = oaktrail_probe, + .remove = __devexit_p(oaktrail_remove) +}; + +static int dmi_check_cb(const struct dmi_system_id *id) +{ + pr_info("Identified model '%s'\n", id->ident); + return 0; +} + +static struct dmi_system_id __initdata oaktrail_dmi_table[] = { + { + .ident = "OakTrail platform", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "OakTrail platform"), + }, + .callback = dmi_check_cb + }, + { } +}; + +static int __init oaktrail_init(void) +{ + int ret; + + if (acpi_disabled) { + pr_err("ACPI needs to be enabled for this driver to work!\n"); + return -ENODEV; + } + + if (!force && !dmi_check_system(oaktrail_dmi_table)) { + pr_err("Platform not recognized (You could try the module's force-parameter)"); + return -ENODEV; + } + + ret = platform_driver_register(&oaktrail_driver); + if (ret) { + pr_warning("Unable to register platform driver\n"); + goto err_driver_reg; + } + + oaktrail_device = platform_device_alloc(DRIVER_NAME, -1); + if (!oaktrail_device) { + pr_warning("Unable to allocate platform device\n"); + ret = -ENOMEM; + goto err_device_alloc; + } + + ret = platform_device_add(oaktrail_device); + if (ret) { + pr_warning("Unable to add platform device\n"); + goto err_device_add; + } + + if (!acpi_video_backlight_support()) { + ret = oaktrail_backlight_init(); + if (ret) + goto err_backlight; + + } else + pr_info("Backlight controlled by ACPI video driver\n"); + + ret = oaktrail_rfkill_init(); + if (ret) { + pr_warning("Setup rfkill failed\n"); + goto err_rfkill; + } + + pr_info("Driver "DRIVER_VERSION" successfully loaded\n"); + return 0; + +err_rfkill: + oaktrail_backlight_exit(); +err_backlight: + platform_device_del(oaktrail_device); +err_device_add: + platform_device_put(oaktrail_device); +err_device_alloc: + platform_driver_unregister(&oaktrail_driver); +err_driver_reg: + + return ret; +} + +static void __exit oaktrail_cleanup(void) +{ + oaktrail_backlight_exit(); + oaktrail_rfkill_cleanup(); + platform_device_unregister(oaktrail_device); + platform_driver_unregister(&oaktrail_driver); + + pr_info("Driver unloaded\n"); +} + +module_init(oaktrail_init); +module_exit(oaktrail_cleanup); + +MODULE_AUTHOR("Yin Kangkai (kangkai.yin@intel.com)"); +MODULE_DESCRIPTION("Intel Oaktrail Platform ACPI Extras"); +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("dmi:*:svnIntelCorporation:pnOakTrailplatform:*"); diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c index 464bb3fc4d8..1686c1e07d5 100644 --- a/drivers/platform/x86/intel_pmic_gpio.c +++ b/drivers/platform/x86/intel_pmic_gpio.c @@ -19,6 +19,8 @@ * Moorestown platform PMIC chip */ +#define pr_fmt(fmt) "%s: " fmt, __func__ + #include <linux/module.h> #include <linux/kernel.h> #include <linux/interrupt.h> @@ -90,8 +92,7 @@ static void pmic_program_irqtype(int gpio, int type) static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned offset) { if (offset > 8) { - printk(KERN_ERR - "%s: only pin 0-7 support input\n", __func__); + pr_err("only pin 0-7 support input\n"); return -1;/* we only have 8 GPIO can use as input */ } return intel_scu_ipc_update_register(GPIO0 + offset, @@ -116,8 +117,7 @@ static int pmic_gpio_direction_output(struct gpio_chip *chip, value ? 1 << (offset - 16) : 0, 1 << (offset - 16)); else { - printk(KERN_ERR - "%s: invalid PMIC GPIO pin %d!\n", __func__, offset); + pr_err("invalid PMIC GPIO pin %d!\n", offset); WARN_ON(1); } @@ -260,7 +260,7 @@ static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev) /* setting up SRAM mapping for GPIOINT register */ pg->gpiointr = ioremap_nocache(pdata->gpiointr, 8); if (!pg->gpiointr) { - printk(KERN_ERR "%s: Can not map GPIOINT.\n", __func__); + pr_err("Can not map GPIOINT\n"); retval = -EINVAL; goto err2; } @@ -281,13 +281,13 @@ static int __devinit platform_pmic_gpio_probe(struct platform_device *pdev) pg->chip.dev = dev; retval = gpiochip_add(&pg->chip); if (retval) { - printk(KERN_ERR "%s: Can not add pmic gpio chip.\n", __func__); + pr_err("Can not add pmic gpio chip\n"); goto err; } retval = request_irq(pg->irq, pmic_irq_handler, 0, "pmic", pg); if (retval) { - printk(KERN_WARNING "pmic: Interrupt request failed\n"); + pr_warn("Interrupt request failed\n"); goto err; } diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c index 23fb2afda00..3ff629df9f0 100644 --- a/drivers/platform/x86/msi-laptop.c +++ b/drivers/platform/x86/msi-laptop.c @@ -135,7 +135,7 @@ static int set_lcd_level(int level) buf[1] = (u8) (level*31); return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, buf, sizeof(buf), - NULL, 0, 1); + NULL, 0); } static int get_lcd_level(void) @@ -144,7 +144,7 @@ static int get_lcd_level(void) int result; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -157,7 +157,7 @@ static int get_auto_brightness(void) int result; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, &wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -172,7 +172,7 @@ static int set_auto_brightness(int enable) wdata[0] = 4; result = ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 1, - &rdata, 1, 1); + &rdata, 1); if (result < 0) return result; @@ -180,7 +180,7 @@ static int set_auto_brightness(int enable) wdata[1] = (rdata & 0xF7) | (enable ? 8 : 0); return ec_transaction(MSI_EC_COMMAND_LCD_LEVEL, wdata, 2, - NULL, 0, 1); + NULL, 0); } static ssize_t set_device_state(const char *buf, size_t count, u8 mask) @@ -217,7 +217,7 @@ static int get_wireless_state(int *wlan, int *bluetooth) u8 wdata = 0, rdata; int result; - result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1, 1); + result = ec_transaction(MSI_EC_COMMAND_WIRELESS, &wdata, 1, &rdata, 1); if (result < 0) return -1; @@ -447,7 +447,7 @@ static struct platform_device *msipf_device; static int dmi_check_cb(const struct dmi_system_id *id) { - pr_info("Identified laptop model '%s'.\n", id->ident); + pr_info("Identified laptop model '%s'\n", id->ident); return 1; } @@ -800,7 +800,7 @@ static void msi_laptop_input_destroy(void) input_unregister_device(msi_laptop_input_dev); } -static int load_scm_model_init(struct platform_device *sdev) +static int __init load_scm_model_init(struct platform_device *sdev) { u8 data; int result; @@ -875,8 +875,7 @@ static int __init msi_init(void) /* Register backlight stuff */ if (acpi_video_backlight_support()) { - pr_info("Brightness ignored, must be controlled " - "by ACPI video driver\n"); + pr_info("Brightness ignored, must be controlled by ACPI video driver\n"); } else { struct backlight_properties props; memset(&props, 0, sizeof(struct backlight_properties)); @@ -930,7 +929,7 @@ static int __init msi_init(void) if (auto_brightness != 2) set_auto_brightness(auto_brightness); - pr_info("driver "MSI_DRIVER_VERSION" successfully loaded.\n"); + pr_info("driver " MSI_DRIVER_VERSION " successfully loaded\n"); return 0; @@ -978,7 +977,7 @@ static void __exit msi_cleanup(void) if (auto_brightness != 2) set_auto_brightness(1); - pr_info("driver unloaded.\n"); + pr_info("driver unloaded\n"); } module_init(msi_init); diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c index d5419c9ec07..c832e3356cd 100644 --- a/drivers/platform/x86/msi-wmi.c +++ b/drivers/platform/x86/msi-wmi.c @@ -20,6 +20,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/input.h> @@ -36,13 +37,10 @@ MODULE_ALIAS("wmi:551A1F84-FBDD-4125-91DB-3EA8F44F1D45"); MODULE_ALIAS("wmi:B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2"); #define DRV_NAME "msi-wmi" -#define DRV_PFX DRV_NAME ": " #define MSIWMI_BIOS_GUID "551A1F84-FBDD-4125-91DB-3EA8F44F1D45" #define MSIWMI_EVENT_GUID "B6F3EEF2-3D2F-49DC-9DE3-85BCE18C62F2" -#define dprintk(msg...) pr_debug(DRV_PFX msg) - #define SCANCODE_BASE 0xD0 #define MSI_WMI_BRIGHTNESSUP SCANCODE_BASE #define MSI_WMI_BRIGHTNESSDOWN (SCANCODE_BASE + 1) @@ -78,7 +76,7 @@ static int msi_wmi_query_block(int instance, int *ret) if (!obj || obj->type != ACPI_TYPE_INTEGER) { if (obj) { - printk(KERN_ERR DRV_PFX "query block returned object " + pr_err("query block returned object " "type: %d - buffer length:%d\n", obj->type, obj->type == ACPI_TYPE_BUFFER ? obj->buffer.length : 0); @@ -97,8 +95,8 @@ static int msi_wmi_set_block(int instance, int value) struct acpi_buffer input = { sizeof(int), &value }; - dprintk("Going to set block of instance: %d - value: %d\n", - instance, value); + pr_debug("Going to set block of instance: %d - value: %d\n", + instance, value); status = wmi_set_block(MSIWMI_BIOS_GUID, instance, &input); @@ -112,20 +110,19 @@ static int bl_get(struct backlight_device *bd) /* Instance 1 is "get backlight", cmp with DSDT */ err = msi_wmi_query_block(1, &ret); if (err) { - printk(KERN_ERR DRV_PFX "Could not query backlight: %d\n", err); + pr_err("Could not query backlight: %d\n", err); return -EINVAL; } - dprintk("Get: Query block returned: %d\n", ret); + pr_debug("Get: Query block returned: %d\n", ret); for (level = 0; level < ARRAY_SIZE(backlight_map); level++) { if (backlight_map[level] == ret) { - dprintk("Current backlight level: 0x%X - index: %d\n", - backlight_map[level], level); + pr_debug("Current backlight level: 0x%X - index: %d\n", + backlight_map[level], level); break; } } if (level == ARRAY_SIZE(backlight_map)) { - printk(KERN_ERR DRV_PFX "get: Invalid brightness value: 0x%X\n", - ret); + pr_err("get: Invalid brightness value: 0x%X\n", ret); return -EINVAL; } return level; @@ -156,7 +153,7 @@ static void msi_wmi_notify(u32 value, void *context) status = wmi_get_event_data(value, &response); if (status != AE_OK) { - printk(KERN_INFO DRV_PFX "bad event status 0x%x\n", status); + pr_info("bad event status 0x%x\n", status); return; } @@ -164,7 +161,7 @@ static void msi_wmi_notify(u32 value, void *context) if (obj && obj->type == ACPI_TYPE_INTEGER) { int eventcode = obj->integer.value; - dprintk("Eventcode: 0x%x\n", eventcode); + pr_debug("Eventcode: 0x%x\n", eventcode); key = sparse_keymap_entry_from_scancode(msi_wmi_input_dev, eventcode); if (key) { @@ -175,8 +172,8 @@ static void msi_wmi_notify(u32 value, void *context) /* Ignore event if the same event happened in a 50 ms timeframe -> Key press may result in 10-20 GPEs */ if (ktime_to_us(diff) < 1000 * 50) { - dprintk("Suppressed key event 0x%X - " - "Last press was %lld us ago\n", + pr_debug("Suppressed key event 0x%X - " + "Last press was %lld us ago\n", key->code, ktime_to_us(diff)); return; } @@ -187,17 +184,16 @@ static void msi_wmi_notify(u32 value, void *context) (!acpi_video_backlight_support() || (key->code != MSI_WMI_BRIGHTNESSUP && key->code != MSI_WMI_BRIGHTNESSDOWN))) { - dprintk("Send key: 0x%X - " - "Input layer keycode: %d\n", key->code, - key->keycode); + pr_debug("Send key: 0x%X - " + "Input layer keycode: %d\n", + key->code, key->keycode); sparse_keymap_report_entry(msi_wmi_input_dev, key, 1, true); } } else - printk(KERN_INFO "Unknown key pressed - %x\n", - eventcode); + pr_info("Unknown key pressed - %x\n", eventcode); } else - printk(KERN_INFO DRV_PFX "Unknown event received\n"); + pr_info("Unknown event received\n"); kfree(response.pointer); } @@ -238,8 +234,7 @@ static int __init msi_wmi_init(void) int err; if (!wmi_has_guid(MSIWMI_EVENT_GUID)) { - printk(KERN_ERR - "This machine doesn't have MSI-hotkeys through WMI\n"); + pr_err("This machine doesn't have MSI-hotkeys through WMI\n"); return -ENODEV; } err = wmi_install_notify_handler(MSIWMI_EVENT_GUID, @@ -270,7 +265,7 @@ static int __init msi_wmi_init(void) backlight->props.brightness = err; } - dprintk("Event handler installed\n"); + pr_debug("Event handler installed\n"); return 0; diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 6fe8cd6e23b..bbd182e178c 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -42,6 +42,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/moduleparam.h> @@ -70,10 +72,10 @@ #include <linux/miscdevice.h> #endif -#define DRV_PFX "sony-laptop: " -#define dprintk(msg...) do { \ - if (debug) \ - pr_warn(DRV_PFX msg); \ +#define dprintk(fmt, ...) \ +do { \ + if (debug) \ + pr_warn(fmt, ##__VA_ARGS__); \ } while (0) #define SONY_LAPTOP_DRIVER_VERSION "0.6" @@ -418,7 +420,7 @@ static int sony_laptop_setup_input(struct acpi_device *acpi_device) error = kfifo_alloc(&sony_laptop_input.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL); if (error) { - pr_err(DRV_PFX "kfifo_alloc failed\n"); + pr_err("kfifo_alloc failed\n"); goto err_dec_users; } @@ -702,7 +704,7 @@ static int acpi_callgetfunc(acpi_handle handle, char *name, int *result) return 0; } - pr_warn(DRV_PFX "acpi_callreadfunc failed\n"); + pr_warn("acpi_callreadfunc failed\n"); return -1; } @@ -728,8 +730,7 @@ static int acpi_callsetfunc(acpi_handle handle, char *name, int value, if (status == AE_OK) { if (result != NULL) { if (out_obj.type != ACPI_TYPE_INTEGER) { - pr_warn(DRV_PFX "acpi_evaluate_object bad " - "return type\n"); + pr_warn("acpi_evaluate_object bad return type\n"); return -1; } *result = out_obj.integer.value; @@ -737,7 +738,7 @@ static int acpi_callsetfunc(acpi_handle handle, char *name, int value, return 0; } - pr_warn(DRV_PFX "acpi_evaluate_object failed\n"); + pr_warn("acpi_evaluate_object failed\n"); return -1; } @@ -961,7 +962,6 @@ static int sony_backlight_get_brightness(struct backlight_device *bd) static int sony_nc_get_brightness_ng(struct backlight_device *bd) { int result; - int *handle = (int *)bl_get_data(bd); struct sony_backlight_props *sdev = (struct sony_backlight_props *)bl_get_data(bd); @@ -973,7 +973,6 @@ static int sony_nc_get_brightness_ng(struct backlight_device *bd) static int sony_nc_update_status_ng(struct backlight_device *bd) { int value, result; - int *handle = (int *)bl_get_data(bd); struct sony_backlight_props *sdev = (struct sony_backlight_props *)bl_get_data(bd); @@ -1104,10 +1103,8 @@ static void sony_nc_notify(struct acpi_device *device, u32 event) } if (!key_event->data) - pr_info(DRV_PFX - "Unknown event: 0x%x 0x%x\n", - key_handle, - ev); + pr_info("Unknown event: 0x%x 0x%x\n", + key_handle, ev); else sony_laptop_report_input_event(ev); } @@ -1128,7 +1125,7 @@ static acpi_status sony_walk_callback(acpi_handle handle, u32 level, struct acpi_device_info *info; if (ACPI_SUCCESS(acpi_get_object_info(handle, &info))) { - pr_warn(DRV_PFX "method: name: %4.4s, args %X\n", + pr_warn("method: name: %4.4s, args %X\n", (char *)&info->name, info->param_count); kfree(info); @@ -1169,7 +1166,7 @@ static int sony_nc_resume(struct acpi_device *device) ret = acpi_callsetfunc(sony_nc_acpi_handle, *item->acpiset, item->value, NULL); if (ret < 0) { - pr_err(DRV_PFX "%s: %d\n", __func__, ret); + pr_err("%s: %d\n", __func__, ret); break; } } @@ -1336,12 +1333,12 @@ static void sony_nc_rfkill_setup(struct acpi_device *device) device_enum = (union acpi_object *) buffer.pointer; if (!device_enum) { - pr_err(DRV_PFX "No SN06 return object."); + pr_err("No SN06 return object\n"); goto out_no_enum; } if (device_enum->type != ACPI_TYPE_BUFFER) { - pr_err(DRV_PFX "Invalid SN06 return object 0x%.2x\n", - device_enum->type); + pr_err("Invalid SN06 return object 0x%.2x\n", + device_enum->type); goto out_no_enum; } @@ -1662,7 +1659,7 @@ static void sony_nc_backlight_setup(void) ops, &props); if (IS_ERR(sony_bl_props.dev)) { - pr_warn(DRV_PFX "unable to register backlight device\n"); + pr_warn("unable to register backlight device\n"); sony_bl_props.dev = NULL; } else sony_bl_props.dev->props.brightness = @@ -1682,8 +1679,7 @@ static int sony_nc_add(struct acpi_device *device) acpi_handle handle; struct sony_nc_value *item; - pr_info(DRV_PFX "%s v%s.\n", SONY_NC_DRIVER_NAME, - SONY_LAPTOP_DRIVER_VERSION); + pr_info("%s v%s\n", SONY_NC_DRIVER_NAME, SONY_LAPTOP_DRIVER_VERSION); sony_nc_acpi_device = device; strcpy(acpi_device_class(device), "sony/hotkey"); @@ -1708,7 +1704,7 @@ static int sony_nc_add(struct acpi_device *device) sony_nc_acpi_handle, 1, sony_walk_callback, NULL, NULL, NULL); if (ACPI_FAILURE(status)) { - pr_warn(DRV_PFX "unable to walk acpi resources\n"); + pr_warn("unable to walk acpi resources\n"); result = -ENODEV; goto outpresent; } @@ -1736,13 +1732,12 @@ static int sony_nc_add(struct acpi_device *device) /* setup input devices and helper fifo */ result = sony_laptop_setup_input(device); if (result) { - pr_err(DRV_PFX "Unable to create input devices.\n"); + pr_err("Unable to create input devices\n"); goto outkbdbacklight; } if (acpi_video_backlight_support()) { - pr_info(DRV_PFX "brightness ignored, must be " - "controlled by ACPI video driver\n"); + pr_info("brightness ignored, must be controlled by ACPI video driver\n"); } else { sony_nc_backlight_setup(); } @@ -2265,9 +2260,9 @@ out: if (pcidev) pci_dev_put(pcidev); - pr_info(DRV_PFX "detected Type%d model\n", - dev->model == SONYPI_DEVICE_TYPE1 ? 1 : - dev->model == SONYPI_DEVICE_TYPE2 ? 2 : 3); + pr_info("detected Type%d model\n", + dev->model == SONYPI_DEVICE_TYPE1 ? 1 : + dev->model == SONYPI_DEVICE_TYPE2 ? 2 : 3); } /* camera tests and poweron/poweroff */ @@ -2313,7 +2308,7 @@ static int __sony_pic_camera_ready(void) static int __sony_pic_camera_off(void) { if (!camera) { - pr_warn(DRV_PFX "camera control not enabled\n"); + pr_warn("camera control not enabled\n"); return -ENODEV; } @@ -2333,7 +2328,7 @@ static int __sony_pic_camera_on(void) int i, j, x; if (!camera) { - pr_warn(DRV_PFX "camera control not enabled\n"); + pr_warn("camera control not enabled\n"); return -ENODEV; } @@ -2356,7 +2351,7 @@ static int __sony_pic_camera_on(void) } if (j == 0) { - pr_warn(DRV_PFX "failed to power on camera\n"); + pr_warn("failed to power on camera\n"); return -ENODEV; } @@ -2412,8 +2407,7 @@ int sony_pic_camera_command(int command, u8 value) ITERATIONS_SHORT); break; default: - pr_err(DRV_PFX "sony_pic_camera_command invalid: %d\n", - command); + pr_err("sony_pic_camera_command invalid: %d\n", command); break; } mutex_unlock(&spic_dev.lock); @@ -2819,7 +2813,7 @@ static int sonypi_compat_init(void) error = kfifo_alloc(&sonypi_compat.fifo, SONY_LAPTOP_BUF_SIZE, GFP_KERNEL); if (error) { - pr_err(DRV_PFX "kfifo_alloc failed\n"); + pr_err("kfifo_alloc failed\n"); return error; } @@ -2829,12 +2823,12 @@ static int sonypi_compat_init(void) sonypi_misc_device.minor = minor; error = misc_register(&sonypi_misc_device); if (error) { - pr_err(DRV_PFX "misc_register failed\n"); + pr_err("misc_register failed\n"); goto err_free_kfifo; } if (minor == -1) - pr_info(DRV_PFX "device allocated minor is %d\n", - sonypi_misc_device.minor); + pr_info("device allocated minor is %d\n", + sonypi_misc_device.minor); return 0; @@ -2893,8 +2887,8 @@ sony_pic_read_possible_resource(struct acpi_resource *resource, void *context) } for (i = 0; i < p->interrupt_count; i++) { if (!p->interrupts[i]) { - pr_warn(DRV_PFX "Invalid IRQ %d\n", - p->interrupts[i]); + pr_warn("Invalid IRQ %d\n", + p->interrupts[i]); continue; } interrupt = kzalloc(sizeof(*interrupt), @@ -2932,14 +2926,14 @@ sony_pic_read_possible_resource(struct acpi_resource *resource, void *context) ioport->io2.address_length); } else { - pr_err(DRV_PFX "Unknown SPIC Type, more than 2 IO Ports\n"); + pr_err("Unknown SPIC Type, more than 2 IO Ports\n"); return AE_ERROR; } return AE_OK; } default: dprintk("Resource %d isn't an IRQ nor an IO port\n", - resource->type); + resource->type); case ACPI_RESOURCE_TYPE_END_TAG: return AE_OK; @@ -2960,7 +2954,7 @@ static int sony_pic_possible_resources(struct acpi_device *device) dprintk("Evaluating _STA\n"); result = acpi_bus_get_status(device); if (result) { - pr_warn(DRV_PFX "Unable to read status\n"); + pr_warn("Unable to read status\n"); goto end; } @@ -2976,8 +2970,7 @@ static int sony_pic_possible_resources(struct acpi_device *device) status = acpi_walk_resources(device->handle, METHOD_NAME__PRS, sony_pic_read_possible_resource, &spic_dev); if (ACPI_FAILURE(status)) { - pr_warn(DRV_PFX "Failure evaluating %s\n", - METHOD_NAME__PRS); + pr_warn("Failure evaluating %s\n", METHOD_NAME__PRS); result = -ENODEV; } end: @@ -3090,7 +3083,7 @@ static int sony_pic_enable(struct acpi_device *device, /* check for total failure */ if (ACPI_FAILURE(status)) { - pr_err(DRV_PFX "Error evaluating _SRS\n"); + pr_err("Error evaluating _SRS\n"); result = -ENODEV; goto end; } @@ -3182,7 +3175,7 @@ static int sony_pic_remove(struct acpi_device *device, int type) struct sony_pic_irq *irq, *tmp_irq; if (sony_pic_disable(device)) { - pr_err(DRV_PFX "Couldn't disable device.\n"); + pr_err("Couldn't disable device\n"); return -ENXIO; } @@ -3222,8 +3215,7 @@ static int sony_pic_add(struct acpi_device *device) struct sony_pic_ioport *io, *tmp_io; struct sony_pic_irq *irq, *tmp_irq; - pr_info(DRV_PFX "%s v%s.\n", SONY_PIC_DRIVER_NAME, - SONY_LAPTOP_DRIVER_VERSION); + pr_info("%s v%s\n", SONY_PIC_DRIVER_NAME, SONY_LAPTOP_DRIVER_VERSION); spic_dev.acpi_dev = device; strcpy(acpi_device_class(device), "sony/hotkey"); @@ -3233,14 +3225,14 @@ static int sony_pic_add(struct acpi_device *device) /* read _PRS resources */ result = sony_pic_possible_resources(device); if (result) { - pr_err(DRV_PFX "Unable to read possible resources.\n"); + pr_err("Unable to read possible resources\n"); goto err_free_resources; } /* setup input devices and helper fifo */ result = sony_laptop_setup_input(device); if (result) { - pr_err(DRV_PFX "Unable to create input devices.\n"); + pr_err("Unable to create input devices\n"); goto err_free_resources; } @@ -3281,7 +3273,7 @@ static int sony_pic_add(struct acpi_device *device) } } if (!spic_dev.cur_ioport) { - pr_err(DRV_PFX "Failed to request_region.\n"); + pr_err("Failed to request_region\n"); result = -ENODEV; goto err_remove_compat; } @@ -3301,7 +3293,7 @@ static int sony_pic_add(struct acpi_device *device) } } if (!spic_dev.cur_irq) { - pr_err(DRV_PFX "Failed to request_irq.\n"); + pr_err("Failed to request_irq\n"); result = -ENODEV; goto err_release_region; } @@ -3309,7 +3301,7 @@ static int sony_pic_add(struct acpi_device *device) /* set resource status _SRS */ result = sony_pic_enable(device, spic_dev.cur_ioport, spic_dev.cur_irq); if (result) { - pr_err(DRV_PFX "Couldn't enable device.\n"); + pr_err("Couldn't enable device\n"); goto err_free_irq; } @@ -3418,7 +3410,7 @@ static int __init sony_laptop_init(void) if (!no_spic && dmi_check_system(sonypi_dmi_table)) { result = acpi_bus_register_driver(&sony_pic_driver); if (result) { - pr_err(DRV_PFX "Unable to register SPIC driver."); + pr_err("Unable to register SPIC driver\n"); goto out; } spic_drv_registered = 1; @@ -3426,7 +3418,7 @@ static int __init sony_laptop_init(void) result = acpi_bus_register_driver(&sony_nc_driver); if (result) { - pr_err(DRV_PFX "Unable to register SNC driver."); + pr_err("Unable to register SNC driver\n"); goto out_unregister_pic; } diff --git a/drivers/platform/x86/tc1100-wmi.c b/drivers/platform/x86/tc1100-wmi.c index 865ef78d6f1..e24f5ae475a 100644 --- a/drivers/platform/x86/tc1100-wmi.c +++ b/drivers/platform/x86/tc1100-wmi.c @@ -25,6 +25,8 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> @@ -40,9 +42,6 @@ #define TC1100_INSTANCE_WIRELESS 1 #define TC1100_INSTANCE_JOGDIAL 2 -#define TC1100_LOGPREFIX "tc1100-wmi: " -#define TC1100_INFO KERN_INFO TC1100_LOGPREFIX - MODULE_AUTHOR("Jamey Hicks, Carlos Corbacho"); MODULE_DESCRIPTION("HP Compaq TC1100 Tablet WMI Extras"); MODULE_LICENSE("GPL"); @@ -264,7 +263,7 @@ static int __init tc1100_init(void) if (error) goto err_device_del; - printk(TC1100_INFO "HP Compaq TC1100 Tablet WMI Extras loaded\n"); + pr_info("HP Compaq TC1100 Tablet WMI Extras loaded\n"); return 0; err_device_del: diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 562fcf0dd2b..77f6e707a2a 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -21,6 +21,8 @@ * 02110-1301, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #define TPACPI_VERSION "0.24" #define TPACPI_SYSFS_VERSION 0x020700 @@ -224,17 +226,6 @@ enum tpacpi_hkey_event_t { #define TPACPI_MAX_ACPI_ARGS 3 -/* printk headers */ -#define TPACPI_LOG TPACPI_FILE ": " -#define TPACPI_EMERG KERN_EMERG TPACPI_LOG -#define TPACPI_ALERT KERN_ALERT TPACPI_LOG -#define TPACPI_CRIT KERN_CRIT TPACPI_LOG -#define TPACPI_ERR KERN_ERR TPACPI_LOG -#define TPACPI_WARN KERN_WARNING TPACPI_LOG -#define TPACPI_NOTICE KERN_NOTICE TPACPI_LOG -#define TPACPI_INFO KERN_INFO TPACPI_LOG -#define TPACPI_DEBUG KERN_DEBUG TPACPI_LOG - /* Debugging printk groups */ #define TPACPI_DBG_ALL 0xffff #define TPACPI_DBG_DISCLOSETASK 0x8000 @@ -389,34 +380,36 @@ static int tpacpi_uwb_emulstate; * Debugging helpers */ -#define dbg_printk(a_dbg_level, format, arg...) \ - do { if (dbg_level & (a_dbg_level)) \ - printk(TPACPI_DEBUG "%s: " format, __func__ , ## arg); \ - } while (0) +#define dbg_printk(a_dbg_level, format, arg...) \ +do { \ + if (dbg_level & (a_dbg_level)) \ + printk(KERN_DEBUG pr_fmt("%s: " format), \ + __func__, ##arg); \ +} while (0) #ifdef CONFIG_THINKPAD_ACPI_DEBUG #define vdbg_printk dbg_printk static const char *str_supported(int is_supported); #else -#define vdbg_printk(a_dbg_level, format, arg...) \ - do { } while (0) +static inline const char *str_supported(int is_supported) { return ""; } +#define vdbg_printk(a_dbg_level, format, arg...) \ + no_printk(format, ##arg) #endif static void tpacpi_log_usertask(const char * const what) { - printk(TPACPI_DEBUG "%s: access by process with PID %d\n", - what, task_tgid_vnr(current)); + printk(KERN_DEBUG pr_fmt("%s: access by process with PID %d\n"), + what, task_tgid_vnr(current)); } -#define tpacpi_disclose_usertask(what, format, arg...) \ - do { \ - if (unlikely( \ - (dbg_level & TPACPI_DBG_DISCLOSETASK) && \ - (tpacpi_lifecycle == TPACPI_LIFE_RUNNING))) { \ - printk(TPACPI_DEBUG "%s: PID %d: " format, \ - what, task_tgid_vnr(current), ## arg); \ - } \ - } while (0) +#define tpacpi_disclose_usertask(what, format, arg...) \ +do { \ + if (unlikely((dbg_level & TPACPI_DBG_DISCLOSETASK) && \ + (tpacpi_lifecycle == TPACPI_LIFE_RUNNING))) { \ + printk(KERN_DEBUG pr_fmt("%s: PID %d: " format), \ + what, task_tgid_vnr(current), ## arg); \ + } \ +} while (0) /* * Quirk handling helpers @@ -535,15 +528,6 @@ TPACPI_HANDLE(hkey, ec, "\\_SB.HKEY", /* 600e/x, 770e, 770x */ "HKEY", /* all others */ ); /* 570 */ -TPACPI_HANDLE(vid, root, "\\_SB.PCI.AGP.VGA", /* 570 */ - "\\_SB.PCI0.AGP0.VID0", /* 600e/x, 770x */ - "\\_SB.PCI0.VID0", /* 770e */ - "\\_SB.PCI0.VID", /* A21e, G4x, R50e, X30, X40 */ - "\\_SB.PCI0.AGP.VGA", /* X100e and a few others */ - "\\_SB.PCI0.AGP.VID", /* all others */ - ); /* R30, R31 */ - - /************************************************************************* * ACPI helpers */ @@ -563,7 +547,7 @@ static int acpi_evalf(acpi_handle handle, int quiet; if (!*fmt) { - printk(TPACPI_ERR "acpi_evalf() called with empty format\n"); + pr_err("acpi_evalf() called with empty format\n"); return 0; } @@ -588,7 +572,7 @@ static int acpi_evalf(acpi_handle handle, break; /* add more types as needed */ default: - printk(TPACPI_ERR "acpi_evalf() called " + pr_err("acpi_evalf() called " "with invalid format character '%c'\n", c); va_end(ap); return 0; @@ -617,13 +601,13 @@ static int acpi_evalf(acpi_handle handle, break; /* add more types as needed */ default: - printk(TPACPI_ERR "acpi_evalf() called " + pr_err("acpi_evalf() called " "with invalid format character '%c'\n", res_type); return 0; } if (!success && !quiet) - printk(TPACPI_ERR "acpi_evalf(%s, %s, ...) failed: %s\n", + pr_err("acpi_evalf(%s, %s, ...) failed: %s\n", method, fmt0, acpi_format_exception(status)); return success; @@ -767,8 +751,7 @@ static int __init setup_acpi_notify(struct ibm_struct *ibm) rc = acpi_bus_get_device(*ibm->acpi->handle, &ibm->acpi->device); if (rc < 0) { - printk(TPACPI_ERR "acpi_bus_get_device(%s) failed: %d\n", - ibm->name, rc); + pr_err("acpi_bus_get_device(%s) failed: %d\n", ibm->name, rc); return -ENODEV; } @@ -781,12 +764,10 @@ static int __init setup_acpi_notify(struct ibm_struct *ibm) ibm->acpi->type, dispatch_acpi_notify, ibm); if (ACPI_FAILURE(status)) { if (status == AE_ALREADY_EXISTS) { - printk(TPACPI_NOTICE - "another device driver is already " - "handling %s events\n", ibm->name); + pr_notice("another device driver is already " + "handling %s events\n", ibm->name); } else { - printk(TPACPI_ERR - "acpi_install_notify_handler(%s) failed: %s\n", + pr_err("acpi_install_notify_handler(%s) failed: %s\n", ibm->name, acpi_format_exception(status)); } return -ENODEV; @@ -811,8 +792,7 @@ static int __init register_tpacpi_subdriver(struct ibm_struct *ibm) ibm->acpi->driver = kzalloc(sizeof(struct acpi_driver), GFP_KERNEL); if (!ibm->acpi->driver) { - printk(TPACPI_ERR - "failed to allocate memory for ibm->acpi->driver\n"); + pr_err("failed to allocate memory for ibm->acpi->driver\n"); return -ENOMEM; } @@ -823,7 +803,7 @@ static int __init register_tpacpi_subdriver(struct ibm_struct *ibm) rc = acpi_bus_register_driver(ibm->acpi->driver); if (rc < 0) { - printk(TPACPI_ERR "acpi_bus_register_driver(%s) failed: %d\n", + pr_err("acpi_bus_register_driver(%s) failed: %d\n", ibm->name, rc); kfree(ibm->acpi->driver); ibm->acpi->driver = NULL; @@ -1081,15 +1061,14 @@ static int parse_strtoul(const char *buf, static void tpacpi_disable_brightness_delay(void) { if (acpi_evalf(hkey_handle, NULL, "PWMS", "qvd", 0)) - printk(TPACPI_NOTICE - "ACPI backlight control delay disabled\n"); + pr_notice("ACPI backlight control delay disabled\n"); } static void printk_deprecated_attribute(const char * const what, const char * const details) { tpacpi_log_usertask("deprecated sysfs attribute"); - printk(TPACPI_WARN "WARNING: sysfs attribute %s is deprecated and " + pr_warn("WARNING: sysfs attribute %s is deprecated and " "will be removed. %s\n", what, details); } @@ -1264,8 +1243,7 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, &tpacpi_rfk_rfkill_ops, atp_rfk); if (!atp_rfk || !atp_rfk->rfkill) { - printk(TPACPI_ERR - "failed to allocate memory for rfkill class\n"); + pr_err("failed to allocate memory for rfkill class\n"); kfree(atp_rfk); return -ENOMEM; } @@ -1275,9 +1253,8 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, sw_status = (tp_rfkops->get_status)(); if (sw_status < 0) { - printk(TPACPI_ERR - "failed to read initial state for %s, error %d\n", - name, sw_status); + pr_err("failed to read initial state for %s, error %d\n", + name, sw_status); } else { sw_state = (sw_status == TPACPI_RFK_RADIO_OFF); if (set_default) { @@ -1291,9 +1268,7 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, res = rfkill_register(atp_rfk->rfkill); if (res < 0) { - printk(TPACPI_ERR - "failed to register %s rfkill switch: %d\n", - name, res); + pr_err("failed to register %s rfkill switch: %d\n", name, res); rfkill_destroy(atp_rfk->rfkill); kfree(atp_rfk); return res; @@ -1301,7 +1276,7 @@ static int __init tpacpi_new_rfkill(const enum tpacpi_rfk_id id, tpacpi_rfkill_switches[id] = atp_rfk; - printk(TPACPI_INFO "rfkill switch %s: radio is %sblocked\n", + pr_info("rfkill switch %s: radio is %sblocked\n", name, (sw_state || hw_state) ? "" : "un"); return 0; } @@ -1825,10 +1800,8 @@ static void __init tpacpi_check_outdated_fw(void) * broken, or really stable to begin with, so it is * best if the user upgrades the firmware anyway. */ - printk(TPACPI_WARN - "WARNING: Outdated ThinkPad BIOS/EC firmware\n"); - printk(TPACPI_WARN - "WARNING: This firmware may be missing critical bug " + pr_warn("WARNING: Outdated ThinkPad BIOS/EC firmware\n"); + pr_warn("WARNING: This firmware may be missing critical bug " "fixes and/or important features\n"); } } @@ -2117,9 +2090,7 @@ void static hotkey_mask_warn_incomplete_mask(void) (hotkey_all_mask | TPACPI_HKEY_NVRAM_KNOWN_MASK); if (wantedmask) - printk(TPACPI_NOTICE - "required events 0x%08x not enabled!\n", - wantedmask); + pr_notice("required events 0x%08x not enabled!\n", wantedmask); } /* @@ -2157,10 +2128,9 @@ static int hotkey_mask_set(u32 mask) * a given event. */ if (!hotkey_mask_get() && !rc && (fwmask & ~hotkey_acpi_mask)) { - printk(TPACPI_NOTICE - "asked for hotkey mask 0x%08x, but " - "firmware forced it to 0x%08x\n", - fwmask, hotkey_acpi_mask); + pr_notice("asked for hotkey mask 0x%08x, but " + "firmware forced it to 0x%08x\n", + fwmask, hotkey_acpi_mask); } if (tpacpi_lifecycle != TPACPI_LIFE_EXITING) @@ -2184,13 +2154,11 @@ static int hotkey_user_mask_set(const u32 mask) (mask == 0xffff || mask == 0xffffff || mask == 0xffffffff)) { tp_warned.hotkey_mask_ff = 1; - printk(TPACPI_NOTICE - "setting the hotkey mask to 0x%08x is likely " - "not the best way to go about it\n", mask); - printk(TPACPI_NOTICE - "please consider using the driver defaults, " - "and refer to up-to-date thinkpad-acpi " - "documentation\n"); + pr_notice("setting the hotkey mask to 0x%08x is likely " + "not the best way to go about it\n", mask); + pr_notice("please consider using the driver defaults, " + "and refer to up-to-date thinkpad-acpi " + "documentation\n"); } /* Try to enable what the user asked for, plus whatever we need. @@ -2574,8 +2542,7 @@ static void hotkey_poll_setup(const bool may_warn) NULL, TPACPI_NVRAM_KTHREAD_NAME); if (IS_ERR(tpacpi_hotkey_task)) { tpacpi_hotkey_task = NULL; - printk(TPACPI_ERR - "could not create kernel thread " + pr_err("could not create kernel thread " "for hotkey polling\n"); } } @@ -2583,11 +2550,10 @@ static void hotkey_poll_setup(const bool may_warn) hotkey_poll_stop_sync(); if (may_warn && (poll_driver_mask || poll_user_mask) && hotkey_poll_freq == 0) { - printk(TPACPI_NOTICE - "hot keys 0x%08x and/or events 0x%08x " - "require polling, which is currently " - "disabled\n", - poll_user_mask, poll_driver_mask); + pr_notice("hot keys 0x%08x and/or events 0x%08x " + "require polling, which is currently " + "disabled\n", + poll_user_mask, poll_driver_mask); } } } @@ -2811,13 +2777,13 @@ static ssize_t hotkey_source_mask_store(struct device *dev, mutex_unlock(&hotkey_mutex); if (rc < 0) - printk(TPACPI_ERR "hotkey_source_mask: failed to update the" - "firmware event mask!\n"); + pr_err("hotkey_source_mask: " + "failed to update the firmware event mask!\n"); if (r_ev) - printk(TPACPI_NOTICE "hotkey_source_mask: " - "some important events were disabled: " - "0x%04x\n", r_ev); + pr_notice("hotkey_source_mask: " + "some important events were disabled: 0x%04x\n", + r_ev); tpacpi_disclose_usertask("hotkey_source_mask", "set to 0x%08lx\n", t); @@ -3048,8 +3014,7 @@ static void hotkey_exit(void) if (((tp_features.hotkey_mask && hotkey_mask_set(hotkey_orig_mask)) | hotkey_status_set(false)) != 0) - printk(TPACPI_ERR - "failed to restore hot key mask " + pr_err("failed to restore hot key mask " "to BIOS defaults\n"); } @@ -3288,10 +3253,9 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) for HKEY interface version 0x100 */ if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) { if ((hkeyv >> 8) != 1) { - printk(TPACPI_ERR "unknown version of the " - "HKEY interface: 0x%x\n", hkeyv); - printk(TPACPI_ERR "please report this to %s\n", - TPACPI_MAIL); + pr_err("unknown version of the HKEY interface: 0x%x\n", + hkeyv); + pr_err("please report this to %s\n", TPACPI_MAIL); } else { /* * MHKV 0x100 in A31, R40, R40e, @@ -3304,8 +3268,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) /* Paranoia check AND init hotkey_all_mask */ if (!acpi_evalf(hkey_handle, &hotkey_all_mask, "MHKA", "qd")) { - printk(TPACPI_ERR - "missing MHKA handler, " + pr_err("missing MHKA handler, " "please report this to %s\n", TPACPI_MAIL); /* Fallback: pre-init for FN+F3,F4,F12 */ @@ -3343,16 +3306,14 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) if (dbg_wlswemul) { tp_features.hotkey_wlsw = 1; radiosw_state = !!tpacpi_wlsw_emulstate; - printk(TPACPI_INFO - "radio switch emulation enabled\n"); + pr_info("radio switch emulation enabled\n"); } else #endif /* Not all thinkpads have a hardware radio switch */ if (acpi_evalf(hkey_handle, &status, "WLSW", "qd")) { tp_features.hotkey_wlsw = 1; radiosw_state = !!status; - printk(TPACPI_INFO - "radio switch found; radios are %s\n", + pr_info("radio switch found; radios are %s\n", enabled(status, 0)); } if (tp_features.hotkey_wlsw) @@ -3363,8 +3324,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) if (!res && acpi_evalf(hkey_handle, &status, "MHKG", "qd")) { tp_features.hotkey_tablet = 1; tabletsw_state = !!(status & TP_HOTKEY_TABLET_MASK); - printk(TPACPI_INFO - "possible tablet mode switch found; " + pr_info("possible tablet mode switch found; " "ThinkPad in %s mode\n", (tabletsw_state) ? "tablet" : "laptop"); res = add_to_attr_set(hotkey_dev_attributes, @@ -3382,8 +3342,7 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) hotkey_keycode_map = kmalloc(TPACPI_HOTKEY_MAP_SIZE, GFP_KERNEL); if (!hotkey_keycode_map) { - printk(TPACPI_ERR - "failed to allocate memory for key map\n"); + pr_err("failed to allocate memory for key map\n"); res = -ENOMEM; goto err_exit; } @@ -3426,13 +3385,11 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) * userspace. tpacpi_detect_brightness_capabilities() must have * been called before this point */ if (tp_features.bright_acpimode && acpi_video_backlight_support()) { - printk(TPACPI_INFO - "This ThinkPad has standard ACPI backlight " - "brightness control, supported by the ACPI " - "video driver\n"); - printk(TPACPI_NOTICE - "Disabling thinkpad-acpi brightness events " - "by default...\n"); + pr_info("This ThinkPad has standard ACPI backlight " + "brightness control, supported by the ACPI " + "video driver\n"); + pr_notice("Disabling thinkpad-acpi brightness events " + "by default...\n"); /* Disable brightness up/down on Lenovo thinkpads when * ACPI is handling them, otherwise it is plain impossible @@ -3539,8 +3496,7 @@ static bool hotkey_notify_wakeup(const u32 hkey, case TP_HKEY_EV_WKUP_S3_BATLOW: /* Battery on critical low level/S3 */ case TP_HKEY_EV_WKUP_S4_BATLOW: /* Battery on critical low level/S4 */ - printk(TPACPI_ALERT - "EMERGENCY WAKEUP: battery almost empty\n"); + pr_alert("EMERGENCY WAKEUP: battery almost empty\n"); /* how to auto-heal: */ /* 2313: woke up from S3, go to S4/S5 */ /* 2413: woke up from S4, go to S5 */ @@ -3551,9 +3507,7 @@ static bool hotkey_notify_wakeup(const u32 hkey, } if (hotkey_wakeup_reason != TP_ACPI_WAKEUP_NONE) { - printk(TPACPI_INFO - "woke up due to a hot-unplug " - "request...\n"); + pr_info("woke up due to a hot-unplug request...\n"); hotkey_wakeup_reason_notify_change(); } return true; @@ -3605,37 +3559,31 @@ static bool hotkey_notify_thermal(const u32 hkey, switch (hkey) { case TP_HKEY_EV_THM_TABLE_CHANGED: - printk(TPACPI_INFO - "EC reports that Thermal Table has changed\n"); + pr_info("EC reports that Thermal Table has changed\n"); /* recommended action: do nothing, we don't have * Lenovo ATM information */ return true; case TP_HKEY_EV_ALARM_BAT_HOT: - printk(TPACPI_CRIT - "THERMAL ALARM: battery is too hot!\n"); + pr_crit("THERMAL ALARM: battery is too hot!\n"); /* recommended action: warn user through gui */ break; case TP_HKEY_EV_ALARM_BAT_XHOT: - printk(TPACPI_ALERT - "THERMAL EMERGENCY: battery is extremely hot!\n"); + pr_alert("THERMAL EMERGENCY: battery is extremely hot!\n"); /* recommended action: immediate sleep/hibernate */ break; case TP_HKEY_EV_ALARM_SENSOR_HOT: - printk(TPACPI_CRIT - "THERMAL ALARM: " + pr_crit("THERMAL ALARM: " "a sensor reports something is too hot!\n"); /* recommended action: warn user through gui, that */ /* some internal component is too hot */ break; case TP_HKEY_EV_ALARM_SENSOR_XHOT: - printk(TPACPI_ALERT - "THERMAL EMERGENCY: " - "a sensor reports something is extremely hot!\n"); + pr_alert("THERMAL EMERGENCY: " + "a sensor reports something is extremely hot!\n"); /* recommended action: immediate sleep/hibernate */ break; default: - printk(TPACPI_ALERT - "THERMAL ALERT: unknown thermal alarm received\n"); + pr_alert("THERMAL ALERT: unknown thermal alarm received\n"); known = false; } @@ -3652,8 +3600,7 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) bool known_ev; if (event != 0x80) { - printk(TPACPI_ERR - "unknown HKEY notification event %d\n", event); + pr_err("unknown HKEY notification event %d\n", event); /* forward it to userspace, maybe it knows how to handle it */ acpi_bus_generate_netlink_event( ibm->acpi->device->pnp.device_class, @@ -3664,7 +3611,7 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) while (1) { if (!acpi_evalf(hkey_handle, &hkey, "MHKP", "d")) { - printk(TPACPI_ERR "failed to retrieve HKEY event\n"); + pr_err("failed to retrieve HKEY event\n"); return; } @@ -3692,8 +3639,7 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) switch (hkey) { case TP_HKEY_EV_BAYEJ_ACK: hotkey_autosleep_ack = 1; - printk(TPACPI_INFO - "bay ejected\n"); + pr_info("bay ejected\n"); hotkey_wakeup_hotunplug_complete_notify_change(); known_ev = true; break; @@ -3709,8 +3655,7 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) /* 0x4000-0x4FFF: dock-related wakeups */ if (hkey == TP_HKEY_EV_UNDOCK_ACK) { hotkey_autosleep_ack = 1; - printk(TPACPI_INFO - "undocked\n"); + pr_info("undocked\n"); hotkey_wakeup_hotunplug_complete_notify_change(); known_ev = true; } else { @@ -3741,11 +3686,9 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) known_ev = false; } if (!known_ev) { - printk(TPACPI_NOTICE - "unhandled HKEY event 0x%04x\n", hkey); - printk(TPACPI_NOTICE - "please report the conditions when this " - "event happened to %s\n", TPACPI_MAIL); + pr_notice("unhandled HKEY event 0x%04x\n", hkey); + pr_notice("please report the conditions when this " + "event happened to %s\n", TPACPI_MAIL); } /* Legacy events */ @@ -3778,8 +3721,7 @@ static void hotkey_resume(void) if (hotkey_status_set(true) < 0 || hotkey_mask_set(hotkey_acpi_mask) < 0) - printk(TPACPI_ERR - "error while attempting to reset the event " + pr_err("error while attempting to reset the event " "firmware interface\n"); tpacpi_send_radiosw_update(); @@ -3824,14 +3766,12 @@ static void hotkey_enabledisable_warn(bool enable) { tpacpi_log_usertask("procfs hotkey enable/disable"); if (!WARN((tpacpi_lifecycle == TPACPI_LIFE_RUNNING || !enable), - TPACPI_WARN - "hotkey enable/disable functionality has been " - "removed from the driver. Hotkeys are always " - "enabled\n")) - printk(TPACPI_ERR - "Please remove the hotkey=enable module " - "parameter, it is deprecated. Hotkeys are always " - "enabled\n"); + pr_fmt("hotkey enable/disable functionality has been " + "removed from the driver. " + "Hotkeys are always enabled.\n"))) + pr_err("Please remove the hotkey=enable module " + "parameter, it is deprecated. " + "Hotkeys are always enabled.\n"); } static int hotkey_write(char *buf) @@ -4011,8 +3951,7 @@ static void bluetooth_shutdown(void) /* Order firmware to save current state to NVRAM */ if (!acpi_evalf(NULL, NULL, "\\BLTH", "vd", TP_ACPI_BLTH_SAVE_STATE)) - printk(TPACPI_NOTICE - "failed to save bluetooth state to NVRAM\n"); + pr_notice("failed to save bluetooth state to NVRAM\n"); else vdbg_printk(TPACPI_DBG_RFKILL, "bluestooth state saved to NVRAM\n"); @@ -4051,8 +3990,7 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm) #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_bluetoothemul) { tp_features.bluetooth = 1; - printk(TPACPI_INFO - "bluetooth switch emulation enabled\n"); + pr_info("bluetooth switch emulation enabled\n"); } else #endif if (tp_features.bluetooth && @@ -4203,8 +4141,7 @@ static void wan_shutdown(void) /* Order firmware to save current state to NVRAM */ if (!acpi_evalf(NULL, NULL, "\\WGSV", "vd", TP_ACPI_WGSV_SAVE_STATE)) - printk(TPACPI_NOTICE - "failed to save WWAN state to NVRAM\n"); + pr_notice("failed to save WWAN state to NVRAM\n"); else vdbg_printk(TPACPI_DBG_RFKILL, "WWAN state saved to NVRAM\n"); @@ -4241,8 +4178,7 @@ static int __init wan_init(struct ibm_init_struct *iibm) #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_wwanemul) { tp_features.wan = 1; - printk(TPACPI_INFO - "wwan switch emulation enabled\n"); + pr_info("wwan switch emulation enabled\n"); } else #endif if (tp_features.wan && @@ -4382,8 +4318,7 @@ static int __init uwb_init(struct ibm_init_struct *iibm) #ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES if (dbg_uwbemul) { tp_features.uwb = 1; - printk(TPACPI_INFO - "uwb switch emulation enabled\n"); + pr_info("uwb switch emulation enabled\n"); } else #endif if (tp_features.uwb && @@ -4444,6 +4379,15 @@ static int video_orig_autosw; static int video_autosw_get(void); static int video_autosw_set(int enable); +TPACPI_HANDLE(vid, root, + "\\_SB.PCI.AGP.VGA", /* 570 */ + "\\_SB.PCI0.AGP0.VID0", /* 600e/x, 770x */ + "\\_SB.PCI0.VID0", /* 770e */ + "\\_SB.PCI0.VID", /* A21e, G4x, R50e, X30, X40 */ + "\\_SB.PCI0.AGP.VGA", /* X100e and a few others */ + "\\_SB.PCI0.AGP.VID", /* all others */ + ); /* R30, R31 */ + TPACPI_HANDLE(vid2, root, "\\_SB.PCI0.AGPB.VID"); /* G41 */ static int __init video_init(struct ibm_init_struct *iibm) @@ -4487,7 +4431,7 @@ static void video_exit(void) dbg_printk(TPACPI_DBG_EXIT, "restoring original video autoswitch mode\n"); if (video_autosw_set(video_orig_autosw)) - printk(TPACPI_ERR "error while trying to restore original " + pr_err("error while trying to restore original " "video autoswitch mode\n"); } @@ -4560,8 +4504,7 @@ static int video_outputsw_set(int status) res = acpi_evalf(vid_handle, NULL, "ASWT", "vdd", status * 0x100, 0); if (!autosw && video_autosw_set(autosw)) { - printk(TPACPI_ERR - "video auto-switch left enabled due to error\n"); + pr_err("video auto-switch left enabled due to error\n"); return -EIO; } break; @@ -4630,8 +4573,7 @@ static int video_outputsw_cycle(void) return -ENOSYS; } if (!autosw && video_autosw_set(autosw)) { - printk(TPACPI_ERR - "video auto-switch left enabled due to error\n"); + pr_err("video auto-switch left enabled due to error\n"); return -EIO; } @@ -5348,7 +5290,7 @@ static int __init led_init(struct ibm_init_struct *iibm) tpacpi_leds = kzalloc(sizeof(*tpacpi_leds) * TPACPI_LED_NUMLEDS, GFP_KERNEL); if (!tpacpi_leds) { - printk(TPACPI_ERR "Out of memory for LED data\n"); + pr_err("Out of memory for LED data\n"); return -ENOMEM; } @@ -5367,9 +5309,8 @@ static int __init led_init(struct ibm_init_struct *iibm) } #ifdef CONFIG_THINKPAD_ACPI_UNSAFE_LEDS - printk(TPACPI_NOTICE - "warning: userspace override of important " - "firmware LEDs is enabled\n"); + pr_notice("warning: userspace override of important " + "firmware LEDs is enabled\n"); #endif return 0; } @@ -5639,17 +5580,16 @@ static void thermal_dump_all_sensors(void) if (n <= 0) return; - printk(TPACPI_NOTICE - "temperatures (Celsius):"); + pr_notice("temperatures (Celsius):"); for (i = 0; i < n; i++) { if (t.temp[i] != TPACPI_THERMAL_SENSOR_NA) - printk(KERN_CONT " %d", (int)(t.temp[i] / 1000)); + pr_cont(" %d", (int)(t.temp[i] / 1000)); else - printk(KERN_CONT " N/A"); + pr_cont(" N/A"); } - printk(KERN_CONT "\n"); + pr_cont("\n"); } /* sysfs temp##_input -------------------------------------------------- */ @@ -5769,14 +5709,12 @@ static int __init thermal_init(struct ibm_init_struct *iibm) if (ta1 == 0) { /* This is sheer paranoia, but we handle it anyway */ if (acpi_tmp7) { - printk(TPACPI_ERR - "ThinkPad ACPI EC access misbehaving, " + pr_err("ThinkPad ACPI EC access misbehaving, " "falling back to ACPI TMPx access " "mode\n"); thermal_read_mode = TPACPI_THERMAL_ACPI_TMP07; } else { - printk(TPACPI_ERR - "ThinkPad ACPI EC access misbehaving, " + pr_err("ThinkPad ACPI EC access misbehaving, " "disabling thermal sensors access\n"); thermal_read_mode = TPACPI_THERMAL_NONE; } @@ -6129,8 +6067,8 @@ static int __init tpacpi_query_bcl_levels(acpi_handle handle) if (ACPI_SUCCESS(acpi_evaluate_object(handle, "_BCL", NULL, &buffer))) { obj = (union acpi_object *)buffer.pointer; if (!obj || (obj->type != ACPI_TYPE_PACKAGE)) { - printk(TPACPI_ERR "Unknown _BCL data, " - "please report this to %s\n", TPACPI_MAIL); + pr_err("Unknown _BCL data, please report this to %s\n", + TPACPI_MAIL); rc = 0; } else { rc = obj->package.count; @@ -6214,18 +6152,15 @@ static void __init tpacpi_detect_brightness_capabilities(void) switch (b) { case 16: bright_maxlvl = 15; - printk(TPACPI_INFO - "detected a 16-level brightness capable ThinkPad\n"); + pr_info("detected a 16-level brightness capable ThinkPad\n"); break; case 8: case 0: bright_maxlvl = 7; - printk(TPACPI_INFO - "detected a 8-level brightness capable ThinkPad\n"); + pr_info("detected a 8-level brightness capable ThinkPad\n"); break; default: - printk(TPACPI_ERR - "Unsupported brightness interface, " + pr_err("Unsupported brightness interface, " "please contact %s\n", TPACPI_MAIL); tp_features.bright_unkfw = 1; bright_maxlvl = b - 1; @@ -6260,22 +6195,19 @@ static int __init brightness_init(struct ibm_init_struct *iibm) if (acpi_video_backlight_support()) { if (brightness_enable > 1) { - printk(TPACPI_INFO - "Standard ACPI backlight interface " - "available, not loading native one.\n"); + pr_info("Standard ACPI backlight interface " + "available, not loading native one\n"); return 1; } else if (brightness_enable == 1) { - printk(TPACPI_WARN - "Cannot enable backlight brightness support, " + pr_warn("Cannot enable backlight brightness support, " "ACPI is already handling it. Refer to the " - "acpi_backlight kernel parameter\n"); + "acpi_backlight kernel parameter.\n"); return 1; } } else if (tp_features.bright_acpimode && brightness_enable > 1) { - printk(TPACPI_NOTICE - "Standard ACPI backlight interface not " - "available, thinkpad_acpi native " - "brightness control enabled\n"); + pr_notice("Standard ACPI backlight interface not " + "available, thinkpad_acpi native " + "brightness control enabled\n"); } /* @@ -6319,19 +6251,17 @@ static int __init brightness_init(struct ibm_init_struct *iibm) if (IS_ERR(ibm_backlight_device)) { int rc = PTR_ERR(ibm_backlight_device); ibm_backlight_device = NULL; - printk(TPACPI_ERR "Could not register backlight device\n"); + pr_err("Could not register backlight device\n"); return rc; } vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_BRGHT, "brightness is supported\n"); if (quirks & TPACPI_BRGHT_Q_ASK) { - printk(TPACPI_NOTICE - "brightness: will use unverified default: " - "brightness_mode=%d\n", brightness_mode); - printk(TPACPI_NOTICE - "brightness: please report to %s whether it works well " - "or not on your ThinkPad\n", TPACPI_MAIL); + pr_notice("brightness: will use unverified default: " + "brightness_mode=%d\n", brightness_mode); + pr_notice("brightness: please report to %s whether it works well " + "or not on your ThinkPad\n", TPACPI_MAIL); } /* Added by mistake in early 2007. Probably useless, but it could @@ -6804,8 +6734,7 @@ static int __init volume_create_alsa_mixer(void) rc = snd_card_create(alsa_index, alsa_id, THIS_MODULE, sizeof(struct tpacpi_alsa_data), &card); if (rc < 0 || !card) { - printk(TPACPI_ERR - "Failed to create ALSA card structures: %d\n", rc); + pr_err("Failed to create ALSA card structures: %d\n", rc); return 1; } @@ -6839,9 +6768,8 @@ static int __init volume_create_alsa_mixer(void) ctl_vol = snd_ctl_new1(&volume_alsa_control_vol, NULL); rc = snd_ctl_add(card, ctl_vol); if (rc < 0) { - printk(TPACPI_ERR - "Failed to create ALSA volume control: %d\n", - rc); + pr_err("Failed to create ALSA volume control: %d\n", + rc); goto err_exit; } data->ctl_vol_id = &ctl_vol->id; @@ -6850,8 +6778,7 @@ static int __init volume_create_alsa_mixer(void) ctl_mute = snd_ctl_new1(&volume_alsa_control_mute, NULL); rc = snd_ctl_add(card, ctl_mute); if (rc < 0) { - printk(TPACPI_ERR "Failed to create ALSA mute control: %d\n", - rc); + pr_err("Failed to create ALSA mute control: %d\n", rc); goto err_exit; } data->ctl_mute_id = &ctl_mute->id; @@ -6859,7 +6786,7 @@ static int __init volume_create_alsa_mixer(void) snd_card_set_dev(card, &tpacpi_pdev->dev); rc = snd_card_register(card); if (rc < 0) { - printk(TPACPI_ERR "Failed to register ALSA card: %d\n", rc); + pr_err("Failed to register ALSA card: %d\n", rc); goto err_exit; } @@ -6915,9 +6842,8 @@ static int __init volume_init(struct ibm_init_struct *iibm) return -EINVAL; if (volume_mode == TPACPI_VOL_MODE_UCMS_STEP) { - printk(TPACPI_ERR - "UCMS step volume mode not implemented, " - "please contact %s\n", TPACPI_MAIL); + pr_err("UCMS step volume mode not implemented, " + "please contact %s\n", TPACPI_MAIL); return 1; } @@ -6981,13 +6907,11 @@ static int __init volume_init(struct ibm_init_struct *iibm) rc = volume_create_alsa_mixer(); if (rc) { - printk(TPACPI_ERR - "Could not create the ALSA mixer interface\n"); + pr_err("Could not create the ALSA mixer interface\n"); return rc; } - printk(TPACPI_INFO - "Console audio control enabled, mode: %s\n", + pr_info("Console audio control enabled, mode: %s\n", (volume_control_allowed) ? "override (read/write)" : "monitor (read only)"); @@ -7049,12 +6973,10 @@ static int volume_write(char *buf) if (!volume_control_allowed && tpacpi_lifecycle != TPACPI_LIFE_INIT) { if (unlikely(!tp_warned.volume_ctrl_forbidden)) { tp_warned.volume_ctrl_forbidden = 1; - printk(TPACPI_NOTICE - "Console audio control in monitor mode, " - "changes are not allowed.\n"); - printk(TPACPI_NOTICE - "Use the volume_control=1 module parameter " - "to enable volume control\n"); + pr_notice("Console audio control in monitor mode, " + "changes are not allowed\n"); + pr_notice("Use the volume_control=1 module parameter " + "to enable volume control\n"); } return -EPERM; } @@ -7129,8 +7051,7 @@ static void inline volume_alsa_notify_change(void) static int __init volume_init(struct ibm_init_struct *iibm) { - printk(TPACPI_INFO - "volume: disabled as there is no ALSA support in this kernel\n"); + pr_info("volume: disabled as there is no ALSA support in this kernel\n"); return 1; } @@ -7337,9 +7258,8 @@ TPACPI_HANDLE(sfan, ec, "SFAN", /* 570 */ static void fan_quirk1_setup(void) { if (fan_control_initial_status == 0x07) { - printk(TPACPI_NOTICE - "fan_init: initial fan status is unknown, " - "assuming it is in auto mode\n"); + pr_notice("fan_init: initial fan status is unknown, " + "assuming it is in auto mode\n"); tp_features.fan_ctrl_status_undef = 1; } } @@ -7726,8 +7646,7 @@ static void fan_watchdog_reset(void) if (!queue_delayed_work(tpacpi_wq, &fan_watchdog_task, msecs_to_jiffies(fan_watchdog_maxinterval * 1000))) { - printk(TPACPI_ERR - "failed to queue the fan watchdog, " + pr_err("failed to queue the fan watchdog, " "watchdog will not trigger\n"); } } else @@ -7741,11 +7660,11 @@ static void fan_watchdog_fire(struct work_struct *ignored) if (tpacpi_lifecycle != TPACPI_LIFE_RUNNING) return; - printk(TPACPI_NOTICE "fan watchdog: enabling fan\n"); + pr_notice("fan watchdog: enabling fan\n"); rc = fan_set_enable(); if (rc < 0) { - printk(TPACPI_ERR "fan watchdog: error %d while enabling fan, " - "will try again later...\n", -rc); + pr_err("fan watchdog: error %d while enabling fan, " + "will try again later...\n", -rc); /* reschedule for later */ fan_watchdog_reset(); } @@ -8049,8 +7968,7 @@ static int __init fan_init(struct ibm_init_struct *iibm) "secondary fan support enabled\n"); } } else { - printk(TPACPI_ERR - "ThinkPad ACPI EC access misbehaving, " + pr_err("ThinkPad ACPI EC access misbehaving, " "fan status and control unavailable\n"); return 1; } @@ -8150,9 +8068,8 @@ static void fan_suspend(pm_message_t state) fan_control_resume_level = 0; rc = fan_get_status_safe(&fan_control_resume_level); if (rc < 0) - printk(TPACPI_NOTICE - "failed to read fan level for later " - "restore during resume: %d\n", rc); + pr_notice("failed to read fan level for later " + "restore during resume: %d\n", rc); /* if it is undefined, don't attempt to restore it. * KEEP THIS LAST */ @@ -8207,13 +8124,11 @@ static void fan_resume(void) return; } if (do_set) { - printk(TPACPI_NOTICE - "restoring fan level to 0x%02x\n", - fan_control_resume_level); + pr_notice("restoring fan level to 0x%02x\n", + fan_control_resume_level); rc = fan_set_level_safe(fan_control_resume_level); if (rc < 0) - printk(TPACPI_NOTICE - "failed to restore fan level: %d\n", rc); + pr_notice("failed to restore fan level: %d\n", rc); } } @@ -8305,8 +8220,8 @@ static int fan_write_cmd_level(const char *cmd, int *rc) *rc = fan_set_level_safe(level); if (*rc == -ENXIO) - printk(TPACPI_ERR "level command accepted for unsupported " - "access mode %d", fan_control_access_mode); + pr_err("level command accepted for unsupported access mode %d\n", + fan_control_access_mode); else if (!*rc) tpacpi_disclose_usertask("procfs fan", "set level to %d\n", level); @@ -8321,8 +8236,8 @@ static int fan_write_cmd_enable(const char *cmd, int *rc) *rc = fan_set_enable(); if (*rc == -ENXIO) - printk(TPACPI_ERR "enable command accepted for unsupported " - "access mode %d", fan_control_access_mode); + pr_err("enable command accepted for unsupported access mode %d\n", + fan_control_access_mode); else if (!*rc) tpacpi_disclose_usertask("procfs fan", "enable\n"); @@ -8336,8 +8251,8 @@ static int fan_write_cmd_disable(const char *cmd, int *rc) *rc = fan_set_disable(); if (*rc == -ENXIO) - printk(TPACPI_ERR "disable command accepted for unsupported " - "access mode %d", fan_control_access_mode); + pr_err("disable command accepted for unsupported access mode %d\n", + fan_control_access_mode); else if (!*rc) tpacpi_disclose_usertask("procfs fan", "disable\n"); @@ -8356,8 +8271,8 @@ static int fan_write_cmd_speed(const char *cmd, int *rc) *rc = fan_set_speed(speed); if (*rc == -ENXIO) - printk(TPACPI_ERR "speed command accepted for unsupported " - "access mode %d", fan_control_access_mode); + pr_err("speed command accepted for unsupported access mode %d\n", + fan_control_access_mode); else if (!*rc) tpacpi_disclose_usertask("procfs fan", "set speed to %d\n", speed); @@ -8560,8 +8475,8 @@ static int __init ibm_init(struct ibm_init_struct *iibm) if (ibm->acpi->notify) { ret = setup_acpi_notify(ibm); if (ret == -ENODEV) { - printk(TPACPI_NOTICE "disabling subdriver %s\n", - ibm->name); + pr_notice("disabling subdriver %s\n", + ibm->name); ret = 0; goto err_out; } @@ -8583,8 +8498,7 @@ static int __init ibm_init(struct ibm_init_struct *iibm) entry = proc_create_data(ibm->name, mode, proc_dir, &dispatch_proc_fops, ibm); if (!entry) { - printk(TPACPI_ERR "unable to create proc entry %s\n", - ibm->name); + pr_err("unable to create proc entry %s\n", ibm->name); ret = -ENODEV; goto err_out; } @@ -8683,13 +8597,11 @@ static int __must_check __init get_thinkpad_model_data( tp->ec_release = (ec_fw_string[4] << 8) | ec_fw_string[5]; } else { - printk(TPACPI_NOTICE - "ThinkPad firmware release %s " - "doesn't match the known patterns\n", - ec_fw_string); - printk(TPACPI_NOTICE - "please report this to %s\n", - TPACPI_MAIL); + pr_notice("ThinkPad firmware release %s " + "doesn't match the known patterns\n", + ec_fw_string); + pr_notice("please report this to %s\n", + TPACPI_MAIL); } break; } @@ -8733,8 +8645,7 @@ static int __init probe_for_thinkpad(void) tpacpi_acpi_handle_locate("ec", TPACPI_ACPI_EC_HID, &ec_handle); if (!ec_handle) { if (is_thinkpad) - printk(TPACPI_ERR - "Not yet supported ThinkPad detected!\n"); + pr_err("Not yet supported ThinkPad detected!\n"); return -ENODEV; } @@ -8746,10 +8657,10 @@ static int __init probe_for_thinkpad(void) static void __init thinkpad_acpi_init_banner(void) { - printk(TPACPI_INFO "%s v%s\n", TPACPI_DESC, TPACPI_VERSION); - printk(TPACPI_INFO "%s\n", TPACPI_URL); + pr_info("%s v%s\n", TPACPI_DESC, TPACPI_VERSION); + pr_info("%s\n", TPACPI_URL); - printk(TPACPI_INFO "ThinkPad BIOS %s, EC %s\n", + pr_info("ThinkPad BIOS %s, EC %s\n", (thinkpad_id.bios_version_str) ? thinkpad_id.bios_version_str : "unknown", (thinkpad_id.ec_version_str) ? @@ -8758,7 +8669,7 @@ static void __init thinkpad_acpi_init_banner(void) BUG_ON(!thinkpad_id.vendor); if (thinkpad_id.model_str) - printk(TPACPI_INFO "%s %s, model %s\n", + pr_info("%s %s, model %s\n", (thinkpad_id.vendor == PCI_VENDOR_ID_IBM) ? "IBM" : ((thinkpad_id.vendor == PCI_VENDOR_ID_LENOVO) ? @@ -9024,8 +8935,7 @@ static int __init thinkpad_acpi_module_init(void) ret = get_thinkpad_model_data(&thinkpad_id); if (ret) { - printk(TPACPI_ERR - "unable to get DMI data: %d\n", ret); + pr_err("unable to get DMI data: %d\n", ret); thinkpad_acpi_module_exit(); return ret; } @@ -9051,16 +8961,14 @@ static int __init thinkpad_acpi_module_init(void) proc_dir = proc_mkdir(TPACPI_PROC_DIR, acpi_root_dir); if (!proc_dir) { - printk(TPACPI_ERR - "unable to create proc dir " TPACPI_PROC_DIR); + pr_err("unable to create proc dir " TPACPI_PROC_DIR "\n"); thinkpad_acpi_module_exit(); return -ENODEV; } ret = platform_driver_register(&tpacpi_pdriver); if (ret) { - printk(TPACPI_ERR - "unable to register main platform driver\n"); + pr_err("unable to register main platform driver\n"); thinkpad_acpi_module_exit(); return ret; } @@ -9068,8 +8976,7 @@ static int __init thinkpad_acpi_module_init(void) ret = platform_driver_register(&tpacpi_hwmon_pdriver); if (ret) { - printk(TPACPI_ERR - "unable to register hwmon platform driver\n"); + pr_err("unable to register hwmon platform driver\n"); thinkpad_acpi_module_exit(); return ret; } @@ -9082,8 +8989,7 @@ static int __init thinkpad_acpi_module_init(void) &tpacpi_hwmon_pdriver.driver); } if (ret) { - printk(TPACPI_ERR - "unable to create sysfs driver attributes\n"); + pr_err("unable to create sysfs driver attributes\n"); thinkpad_acpi_module_exit(); return ret; } @@ -9096,7 +9002,7 @@ static int __init thinkpad_acpi_module_init(void) if (IS_ERR(tpacpi_pdev)) { ret = PTR_ERR(tpacpi_pdev); tpacpi_pdev = NULL; - printk(TPACPI_ERR "unable to register platform device\n"); + pr_err("unable to register platform device\n"); thinkpad_acpi_module_exit(); return ret; } @@ -9106,16 +9012,14 @@ static int __init thinkpad_acpi_module_init(void) if (IS_ERR(tpacpi_sensors_pdev)) { ret = PTR_ERR(tpacpi_sensors_pdev); tpacpi_sensors_pdev = NULL; - printk(TPACPI_ERR - "unable to register hwmon platform device\n"); + pr_err("unable to register hwmon platform device\n"); thinkpad_acpi_module_exit(); return ret; } ret = device_create_file(&tpacpi_sensors_pdev->dev, &dev_attr_thinkpad_acpi_pdev_name); if (ret) { - printk(TPACPI_ERR - "unable to create sysfs hwmon device attributes\n"); + pr_err("unable to create sysfs hwmon device attributes\n"); thinkpad_acpi_module_exit(); return ret; } @@ -9124,14 +9028,14 @@ static int __init thinkpad_acpi_module_init(void) if (IS_ERR(tpacpi_hwmon)) { ret = PTR_ERR(tpacpi_hwmon); tpacpi_hwmon = NULL; - printk(TPACPI_ERR "unable to register hwmon device\n"); + pr_err("unable to register hwmon device\n"); thinkpad_acpi_module_exit(); return ret; } mutex_init(&tpacpi_inputdev_send_mutex); tpacpi_inputdev = input_allocate_device(); if (!tpacpi_inputdev) { - printk(TPACPI_ERR "unable to allocate input device\n"); + pr_err("unable to allocate input device\n"); thinkpad_acpi_module_exit(); return -ENOMEM; } else { @@ -9163,7 +9067,7 @@ static int __init thinkpad_acpi_module_init(void) ret = input_register_device(tpacpi_inputdev); if (ret < 0) { - printk(TPACPI_ERR "unable to register input device\n"); + pr_err("unable to register input device\n"); thinkpad_acpi_module_exit(); return ret; } else { diff --git a/drivers/platform/x86/topstar-laptop.c b/drivers/platform/x86/topstar-laptop.c index 1d07d6d09f2..4c20447ddbb 100644 --- a/drivers/platform/x86/topstar-laptop.c +++ b/drivers/platform/x86/topstar-laptop.c @@ -194,7 +194,7 @@ static int __init topstar_laptop_init(void) if (ret < 0) return ret; - printk(KERN_INFO "Topstar Laptop ACPI extras driver loaded\n"); + pr_info("ACPI extras driver loaded\n"); return 0; } diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 63f42a22e10..cb009b2629e 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -35,6 +35,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #define TOSHIBA_ACPI_VERSION "0.19" #define PROC_INTERFACE_VERSION 1 @@ -60,11 +62,6 @@ MODULE_AUTHOR("John Belmonte"); MODULE_DESCRIPTION("Toshiba Laptop ACPI Extras Driver"); MODULE_LICENSE("GPL"); -#define MY_LOGPREFIX "toshiba_acpi: " -#define MY_ERR KERN_ERR MY_LOGPREFIX -#define MY_NOTICE KERN_NOTICE MY_LOGPREFIX -#define MY_INFO KERN_INFO MY_LOGPREFIX - /* Toshiba ACPI method paths */ #define METHOD_LCD_BRIGHTNESS "\\_SB_.PCI0.VGA_.LCD_._BCM" #define TOSH_INTERFACE_1 "\\_SB_.VALD" @@ -301,7 +298,7 @@ static int toshiba_illumination_available(void) in[0] = 0xf100; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Illumination device not available\n"); + pr_info("Illumination device not available\n"); return 0; } in[0] = 0xf400; @@ -320,7 +317,7 @@ static void toshiba_illumination_set(struct led_classdev *cdev, in[0] = 0xf100; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Illumination device not available\n"); + pr_info("Illumination device not available\n"); return; } @@ -331,7 +328,7 @@ static void toshiba_illumination_set(struct led_classdev *cdev, in[2] = 1; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "ACPI call for illumination failed.\n"); + pr_info("ACPI call for illumination failed\n"); return; } } else { @@ -341,7 +338,7 @@ static void toshiba_illumination_set(struct led_classdev *cdev, in[2] = 0; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "ACPI call for illumination failed.\n"); + pr_info("ACPI call for illumination failed.\n"); return; } } @@ -364,7 +361,7 @@ static enum led_brightness toshiba_illumination_get(struct led_classdev *cdev) in[0] = 0xf100; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Illumination device not available\n"); + pr_info("Illumination device not available\n"); return LED_OFF; } @@ -373,7 +370,7 @@ static enum led_brightness toshiba_illumination_get(struct led_classdev *cdev) in[1] = 0x14e; status = hci_raw(in, out); if (ACPI_FAILURE(status)) { - printk(MY_INFO "ACPI call for illumination failed.\n"); + pr_info("ACPI call for illumination failed.\n"); return LED_OFF; } @@ -517,7 +514,7 @@ static int lcd_proc_show(struct seq_file *m, void *v) seq_printf(m, "brightness_levels: %d\n", HCI_LCD_BRIGHTNESS_LEVELS); } else { - printk(MY_ERR "Error reading LCD brightness\n"); + pr_err("Error reading LCD brightness\n"); } return 0; @@ -592,7 +589,7 @@ static int video_proc_show(struct seq_file *m, void *v) seq_printf(m, "crt_out: %d\n", is_crt); seq_printf(m, "tv_out: %d\n", is_tv); } else { - printk(MY_ERR "Error reading video out status\n"); + pr_err("Error reading video out status\n"); } return 0; @@ -686,7 +683,7 @@ static int fan_proc_show(struct seq_file *m, void *v) seq_printf(m, "running: %d\n", (value > 0)); seq_printf(m, "force_on: %d\n", force_fan); } else { - printk(MY_ERR "Error reading fan status\n"); + pr_err("Error reading fan status\n"); } return 0; @@ -750,9 +747,9 @@ static int keys_proc_show(struct seq_file *m, void *v) * some machines where system events sporadically * become disabled. */ hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result); - printk(MY_NOTICE "Re-enabled hotkeys\n"); + pr_notice("Re-enabled hotkeys\n"); } else { - printk(MY_ERR "Error reading hotkey status\n"); + pr_err("Error reading hotkey status\n"); goto end; } } @@ -863,7 +860,7 @@ static void toshiba_acpi_notify(acpi_handle handle, u32 event, void *context) if (!sparse_keymap_report_event(toshiba_acpi.hotkey_dev, value, 1, true)) { - printk(MY_INFO "Unknown key %x\n", + pr_info("Unknown key %x\n", value); } } else if (hci_result == HCI_NOT_SUPPORTED) { @@ -871,7 +868,7 @@ static void toshiba_acpi_notify(acpi_handle handle, u32 event, void *context) * some machines where system events sporadically * become disabled. */ hci_write1(HCI_SYSTEM_EVENT, 1, &hci_result); - printk(MY_NOTICE "Re-enabled hotkeys\n"); + pr_notice("Re-enabled hotkeys\n"); } } while (hci_result != HCI_EMPTY); } @@ -883,13 +880,13 @@ static int __init toshiba_acpi_setup_keyboard(char *device) status = acpi_get_handle(NULL, device, &toshiba_acpi.handle); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Unable to get notification device\n"); + pr_info("Unable to get notification device\n"); return -ENODEV; } toshiba_acpi.hotkey_dev = input_allocate_device(); if (!toshiba_acpi.hotkey_dev) { - printk(MY_INFO "Unable to register input device\n"); + pr_info("Unable to register input device\n"); return -ENOMEM; } @@ -905,21 +902,21 @@ static int __init toshiba_acpi_setup_keyboard(char *device) status = acpi_install_notify_handler(toshiba_acpi.handle, ACPI_DEVICE_NOTIFY, toshiba_acpi_notify, NULL); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Unable to install hotkey notification\n"); + pr_info("Unable to install hotkey notification\n"); error = -ENODEV; goto err_free_keymap; } status = acpi_evaluate_object(toshiba_acpi.handle, "ENAB", NULL, NULL); if (ACPI_FAILURE(status)) { - printk(MY_INFO "Unable to enable hotkeys\n"); + pr_info("Unable to enable hotkeys\n"); error = -ENODEV; goto err_remove_notify; } error = input_register_device(toshiba_acpi.hotkey_dev); if (error) { - printk(MY_INFO "Unable to register input device\n"); + pr_info("Unable to register input device\n"); goto err_remove_notify; } @@ -980,17 +977,17 @@ static int __init toshiba_acpi_init(void) if (is_valid_acpi_path(TOSH_INTERFACE_1 GHCI_METHOD)) { method_hci = TOSH_INTERFACE_1 GHCI_METHOD; if (toshiba_acpi_setup_keyboard(TOSH_INTERFACE_1)) - printk(MY_INFO "Unable to activate hotkeys\n"); + pr_info("Unable to activate hotkeys\n"); } else if (is_valid_acpi_path(TOSH_INTERFACE_2 GHCI_METHOD)) { method_hci = TOSH_INTERFACE_2 GHCI_METHOD; if (toshiba_acpi_setup_keyboard(TOSH_INTERFACE_2)) - printk(MY_INFO "Unable to activate hotkeys\n"); + pr_info("Unable to activate hotkeys\n"); } else return -ENODEV; - printk(MY_INFO "Toshiba Laptop ACPI Extras version %s\n", + pr_info("Toshiba Laptop ACPI Extras version %s\n", TOSHIBA_ACPI_VERSION); - printk(MY_INFO " HCI method: %s\n", method_hci); + pr_info(" HCI method: %s\n", method_hci); mutex_init(&toshiba_acpi.mutex); @@ -998,7 +995,7 @@ static int __init toshiba_acpi_init(void) -1, NULL, 0); if (IS_ERR(toshiba_acpi.p_dev)) { ret = PTR_ERR(toshiba_acpi.p_dev); - printk(MY_ERR "unable to register platform device\n"); + pr_err("unable to register platform device\n"); toshiba_acpi.p_dev = NULL; toshiba_acpi_exit(); return ret; @@ -1028,7 +1025,7 @@ static int __init toshiba_acpi_init(void) if (IS_ERR(toshiba_backlight_device)) { ret = PTR_ERR(toshiba_backlight_device); - printk(KERN_ERR "Could not register toshiba backlight device\n"); + pr_err("Could not register toshiba backlight device\n"); toshiba_backlight_device = NULL; toshiba_acpi_exit(); return ret; @@ -1042,14 +1039,14 @@ static int __init toshiba_acpi_init(void) &toshiba_rfk_ops, &toshiba_acpi); if (!toshiba_acpi.bt_rfk) { - printk(MY_ERR "unable to allocate rfkill device\n"); + pr_err("unable to allocate rfkill device\n"); toshiba_acpi_exit(); return -ENOMEM; } ret = rfkill_register(toshiba_acpi.bt_rfk); if (ret) { - printk(MY_ERR "unable to register rfkill device\n"); + pr_err("unable to register rfkill device\n"); rfkill_destroy(toshiba_acpi.bt_rfk); toshiba_acpi_exit(); return ret; diff --git a/drivers/platform/x86/toshiba_bluetooth.c b/drivers/platform/x86/toshiba_bluetooth.c index 94406861191..5fb7186694d 100644 --- a/drivers/platform/x86/toshiba_bluetooth.c +++ b/drivers/platform/x86/toshiba_bluetooth.c @@ -17,6 +17,8 @@ * delivered. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -70,14 +72,13 @@ static int toshiba_bluetooth_enable(acpi_handle handle) if (!(result & 0x01)) return 0; - printk(KERN_INFO "toshiba_bluetooth: Re-enabling Toshiba Bluetooth\n"); + pr_info("Re-enabling Toshiba Bluetooth\n"); res1 = acpi_evaluate_object(handle, "AUSB", NULL, NULL); res2 = acpi_evaluate_object(handle, "BTPO", NULL, NULL); if (!ACPI_FAILURE(res1) || !ACPI_FAILURE(res2)) return 0; - printk(KERN_WARNING "toshiba_bluetooth: Failed to re-enable " - "Toshiba Bluetooth\n"); + pr_warn("Failed to re-enable Toshiba Bluetooth\n"); return -ENODEV; } @@ -107,8 +108,8 @@ static int toshiba_bt_rfkill_add(struct acpi_device *device) &bt_present); if (!ACPI_FAILURE(status) && bt_present) { - printk(KERN_INFO "Detected Toshiba ACPI Bluetooth device - " - "installing RFKill handler\n"); + pr_info("Detected Toshiba ACPI Bluetooth device - " + "installing RFKill handler\n"); result = toshiba_bluetooth_enable(device->handle); } diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index 05cc79672a8..f23d5a84e7b 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -486,16 +486,16 @@ static void wmi_dump_wdg(const struct guid_block *g) pr_info("\tnotify_id: %02X\n", g->notify_id); pr_info("\treserved: %02X\n", g->reserved); pr_info("\tinstance_count: %d\n", g->instance_count); - pr_info("\tflags: %#x ", g->flags); + pr_info("\tflags: %#x", g->flags); if (g->flags) { if (g->flags & ACPI_WMI_EXPENSIVE) - pr_cont("ACPI_WMI_EXPENSIVE "); + pr_cont(" ACPI_WMI_EXPENSIVE"); if (g->flags & ACPI_WMI_METHOD) - pr_cont("ACPI_WMI_METHOD "); + pr_cont(" ACPI_WMI_METHOD"); if (g->flags & ACPI_WMI_STRING) - pr_cont("ACPI_WMI_STRING "); + pr_cont(" ACPI_WMI_STRING"); if (g->flags & ACPI_WMI_EVENT) - pr_cont("ACPI_WMI_EVENT "); + pr_cont(" ACPI_WMI_EVENT"); } pr_cont("\n"); diff --git a/drivers/platform/x86/xo15-ebook.c b/drivers/platform/x86/xo15-ebook.c index c1372ed9d2e..fad153dc035 100644 --- a/drivers/platform/x86/xo15-ebook.c +++ b/drivers/platform/x86/xo15-ebook.c @@ -11,6 +11,8 @@ * your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> @@ -20,7 +22,6 @@ #include <acpi/acpi_drivers.h> #define MODULE_NAME "xo15-ebook" -#define PREFIX MODULE_NAME ": " #define XO15_EBOOK_CLASS MODULE_NAME #define XO15_EBOOK_TYPE_UNKNOWN 0x00 @@ -105,7 +106,7 @@ static int ebook_switch_add(struct acpi_device *device) class = acpi_device_class(device); if (strcmp(hid, XO15_EBOOK_HID)) { - printk(KERN_ERR PREFIX "Unsupported hid [%s]\n", hid); + pr_err("Unsupported hid [%s]\n", hid); error = -ENODEV; goto err_free_input; } diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index f46855cd853..ad747dc337d 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -381,11 +381,6 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf, return err; } -/** - * proc_scsi_show - show contents of /proc/scsi/scsi (attached devices) - * @s: output goes here - * @p: not used - */ static int always_match(struct device *dev, void *data) { return 1; diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index fbd96b29530..de35c3ad8a6 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -80,6 +80,15 @@ config SPI_BFIN help This is the SPI controller master driver for Blackfin 5xx processor. +config SPI_BFIN_SPORT + tristate "SPI bus via Blackfin SPORT" + depends on BLACKFIN + help + Enable support for a SPI bus via the Blackfin SPORT peripheral. + + This driver can also be built as a module. If so, the module + will be called spi_bfin_sport. + config SPI_AU1550 tristate "Au1550/Au12x0 SPI Controller" depends on (SOC_AU1550 || SOC_AU1200) && EXPERIMENTAL diff --git a/drivers/spi/Makefile b/drivers/spi/Makefile index fd2fc5f6505..0f8c69b6b19 100644 --- a/drivers/spi/Makefile +++ b/drivers/spi/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_SPI_ALTERA) += spi_altera.o obj-$(CONFIG_SPI_ATMEL) += atmel_spi.o obj-$(CONFIG_SPI_ATH79) += ath79_spi.o obj-$(CONFIG_SPI_BFIN) += spi_bfin5xx.o +obj-$(CONFIG_SPI_BFIN_SPORT) += spi_bfin_sport.o obj-$(CONFIG_SPI_BITBANG) += spi_bitbang.o obj-$(CONFIG_SPI_AU1550) += au1550_spi.o obj-$(CONFIG_SPI_BUTTERFLY) += spi_butterfly.o diff --git a/drivers/spi/spi_bfin_sport.c b/drivers/spi/spi_bfin_sport.c new file mode 100644 index 00000000000..e557ff617b1 --- /dev/null +++ b/drivers/spi/spi_bfin_sport.c @@ -0,0 +1,952 @@ +/* + * SPI bus via the Blackfin SPORT peripheral + * + * Enter bugs at http://blackfin.uclinux.org/ + * + * Copyright 2009-2011 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/gpio.h> +#include <linux/io.h> +#include <linux/ioport.h> +#include <linux/irq.h> +#include <linux/errno.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/spi/spi.h> +#include <linux/workqueue.h> + +#include <asm/portmux.h> +#include <asm/bfin5xx_spi.h> +#include <asm/blackfin.h> +#include <asm/bfin_sport.h> +#include <asm/cacheflush.h> + +#define DRV_NAME "bfin-sport-spi" +#define DRV_DESC "SPI bus via the Blackfin SPORT" + +MODULE_AUTHOR("Cliff Cai"); +MODULE_DESCRIPTION(DRV_DESC); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:bfin-sport-spi"); + +enum bfin_sport_spi_state { + START_STATE, + RUNNING_STATE, + DONE_STATE, + ERROR_STATE, +}; + +struct bfin_sport_spi_master_data; + +struct bfin_sport_transfer_ops { + void (*write) (struct bfin_sport_spi_master_data *); + void (*read) (struct bfin_sport_spi_master_data *); + void (*duplex) (struct bfin_sport_spi_master_data *); +}; + +struct bfin_sport_spi_master_data { + /* Driver model hookup */ + struct device *dev; + + /* SPI framework hookup */ + struct spi_master *master; + + /* Regs base of SPI controller */ + struct sport_register __iomem *regs; + int err_irq; + + /* Pin request list */ + u16 *pin_req; + + /* Driver message queue */ + struct workqueue_struct *workqueue; + struct work_struct pump_messages; + spinlock_t lock; + struct list_head queue; + int busy; + bool run; + + /* Message Transfer pump */ + struct tasklet_struct pump_transfers; + + /* Current message transfer state info */ + enum bfin_sport_spi_state state; + struct spi_message *cur_msg; + struct spi_transfer *cur_transfer; + struct bfin_sport_spi_slave_data *cur_chip; + union { + void *tx; + u8 *tx8; + u16 *tx16; + }; + void *tx_end; + union { + void *rx; + u8 *rx8; + u16 *rx16; + }; + void *rx_end; + + int cs_change; + struct bfin_sport_transfer_ops *ops; +}; + +struct bfin_sport_spi_slave_data { + u16 ctl_reg; + u16 baud; + u16 cs_chg_udelay; /* Some devices require > 255usec delay */ + u32 cs_gpio; + u16 idle_tx_val; + struct bfin_sport_transfer_ops *ops; +}; + +static void +bfin_sport_spi_enable(struct bfin_sport_spi_master_data *drv_data) +{ + bfin_write_or(&drv_data->regs->tcr1, TSPEN); + bfin_write_or(&drv_data->regs->rcr1, TSPEN); + SSYNC(); +} + +static void +bfin_sport_spi_disable(struct bfin_sport_spi_master_data *drv_data) +{ + bfin_write_and(&drv_data->regs->tcr1, ~TSPEN); + bfin_write_and(&drv_data->regs->rcr1, ~TSPEN); + SSYNC(); +} + +/* Caculate the SPI_BAUD register value based on input HZ */ +static u16 +bfin_sport_hz_to_spi_baud(u32 speed_hz) +{ + u_long clk, sclk = get_sclk(); + int div = (sclk / (2 * speed_hz)) - 1; + + if (div < 0) + div = 0; + + clk = sclk / (2 * (div + 1)); + + if (clk > speed_hz) + div++; + + return div; +} + +/* Chip select operation functions for cs_change flag */ +static void +bfin_sport_spi_cs_active(struct bfin_sport_spi_slave_data *chip) +{ + gpio_direction_output(chip->cs_gpio, 0); +} + +static void +bfin_sport_spi_cs_deactive(struct bfin_sport_spi_slave_data *chip) +{ + gpio_direction_output(chip->cs_gpio, 1); + /* Move delay here for consistency */ + if (chip->cs_chg_udelay) + udelay(chip->cs_chg_udelay); +} + +static void +bfin_sport_spi_stat_poll_complete(struct bfin_sport_spi_master_data *drv_data) +{ + unsigned long timeout = jiffies + HZ; + while (!(bfin_read(&drv_data->regs->stat) & RXNE)) { + if (!time_before(jiffies, timeout)) + break; + } +} + +static void +bfin_sport_spi_u8_writer(struct bfin_sport_spi_master_data *drv_data) +{ + u16 dummy; + + while (drv_data->tx < drv_data->tx_end) { + bfin_write(&drv_data->regs->tx16, *drv_data->tx8++); + bfin_sport_spi_stat_poll_complete(drv_data); + dummy = bfin_read(&drv_data->regs->rx16); + } +} + +static void +bfin_sport_spi_u8_reader(struct bfin_sport_spi_master_data *drv_data) +{ + u16 tx_val = drv_data->cur_chip->idle_tx_val; + + while (drv_data->rx < drv_data->rx_end) { + bfin_write(&drv_data->regs->tx16, tx_val); + bfin_sport_spi_stat_poll_complete(drv_data); + *drv_data->rx8++ = bfin_read(&drv_data->regs->rx16); + } +} + +static void +bfin_sport_spi_u8_duplex(struct bfin_sport_spi_master_data *drv_data) +{ + while (drv_data->rx < drv_data->rx_end) { + bfin_write(&drv_data->regs->tx16, *drv_data->tx8++); + bfin_sport_spi_stat_poll_complete(drv_data); + *drv_data->rx8++ = bfin_read(&drv_data->regs->rx16); + } +} + +static struct bfin_sport_transfer_ops bfin_sport_transfer_ops_u8 = { + .write = bfin_sport_spi_u8_writer, + .read = bfin_sport_spi_u8_reader, + .duplex = bfin_sport_spi_u8_duplex, +}; + +static void +bfin_sport_spi_u16_writer(struct bfin_sport_spi_master_data *drv_data) +{ + u16 dummy; + + while (drv_data->tx < drv_data->tx_end) { + bfin_write(&drv_data->regs->tx16, *drv_data->tx16++); + bfin_sport_spi_stat_poll_complete(drv_data); + dummy = bfin_read(&drv_data->regs->rx16); + } +} + +static void +bfin_sport_spi_u16_reader(struct bfin_sport_spi_master_data *drv_data) +{ + u16 tx_val = drv_data->cur_chip->idle_tx_val; + + while (drv_data->rx < drv_data->rx_end) { + bfin_write(&drv_data->regs->tx16, tx_val); + bfin_sport_spi_stat_poll_complete(drv_data); + *drv_data->rx16++ = bfin_read(&drv_data->regs->rx16); + } +} + +static void +bfin_sport_spi_u16_duplex(struct bfin_sport_spi_master_data *drv_data) +{ + while (drv_data->rx < drv_data->rx_end) { + bfin_write(&drv_data->regs->tx16, *drv_data->tx16++); + bfin_sport_spi_stat_poll_complete(drv_data); + *drv_data->rx16++ = bfin_read(&drv_data->regs->rx16); + } +} + +static struct bfin_sport_transfer_ops bfin_sport_transfer_ops_u16 = { + .write = bfin_sport_spi_u16_writer, + .read = bfin_sport_spi_u16_reader, + .duplex = bfin_sport_spi_u16_duplex, +}; + +/* stop controller and re-config current chip */ +static void +bfin_sport_spi_restore_state(struct bfin_sport_spi_master_data *drv_data) +{ + struct bfin_sport_spi_slave_data *chip = drv_data->cur_chip; + unsigned int bits = (drv_data->ops == &bfin_sport_transfer_ops_u8 ? 7 : 15); + + bfin_sport_spi_disable(drv_data); + dev_dbg(drv_data->dev, "restoring spi ctl state\n"); + + bfin_write(&drv_data->regs->tcr1, chip->ctl_reg); + bfin_write(&drv_data->regs->tcr2, bits); + bfin_write(&drv_data->regs->tclkdiv, chip->baud); + bfin_write(&drv_data->regs->tfsdiv, bits); + SSYNC(); + + bfin_write(&drv_data->regs->rcr1, chip->ctl_reg & ~(ITCLK | ITFS)); + bfin_write(&drv_data->regs->rcr2, bits); + SSYNC(); + + bfin_sport_spi_cs_active(chip); +} + +/* test if there is more transfer to be done */ +static enum bfin_sport_spi_state +bfin_sport_spi_next_transfer(struct bfin_sport_spi_master_data *drv_data) +{ + struct spi_message *msg = drv_data->cur_msg; + struct spi_transfer *trans = drv_data->cur_transfer; + + /* Move to next transfer */ + if (trans->transfer_list.next != &msg->transfers) { + drv_data->cur_transfer = + list_entry(trans->transfer_list.next, + struct spi_transfer, transfer_list); + return RUNNING_STATE; + } + + return DONE_STATE; +} + +/* + * caller already set message->status; + * dma and pio irqs are blocked give finished message back + */ +static void +bfin_sport_spi_giveback(struct bfin_sport_spi_master_data *drv_data) +{ + struct bfin_sport_spi_slave_data *chip = drv_data->cur_chip; + unsigned long flags; + struct spi_message *msg; + + spin_lock_irqsave(&drv_data->lock, flags); + msg = drv_data->cur_msg; + drv_data->state = START_STATE; + drv_data->cur_msg = NULL; + drv_data->cur_transfer = NULL; + drv_data->cur_chip = NULL; + queue_work(drv_data->workqueue, &drv_data->pump_messages); + spin_unlock_irqrestore(&drv_data->lock, flags); + + if (!drv_data->cs_change) + bfin_sport_spi_cs_deactive(chip); + + if (msg->complete) + msg->complete(msg->context); +} + +static irqreturn_t +sport_err_handler(int irq, void *dev_id) +{ + struct bfin_sport_spi_master_data *drv_data = dev_id; + u16 status; + + dev_dbg(drv_data->dev, "%s enter\n", __func__); + status = bfin_read(&drv_data->regs->stat) & (TOVF | TUVF | ROVF | RUVF); + + if (status) { + bfin_write(&drv_data->regs->stat, status); + SSYNC(); + + bfin_sport_spi_disable(drv_data); + dev_err(drv_data->dev, "status error:%s%s%s%s\n", + status & TOVF ? " TOVF" : "", + status & TUVF ? " TUVF" : "", + status & ROVF ? " ROVF" : "", + status & RUVF ? " RUVF" : ""); + } + + return IRQ_HANDLED; +} + +static void +bfin_sport_spi_pump_transfers(unsigned long data) +{ + struct bfin_sport_spi_master_data *drv_data = (void *)data; + struct spi_message *message = NULL; + struct spi_transfer *transfer = NULL; + struct spi_transfer *previous = NULL; + struct bfin_sport_spi_slave_data *chip = NULL; + unsigned int bits_per_word; + u32 tranf_success = 1; + u32 transfer_speed; + u8 full_duplex = 0; + + /* Get current state information */ + message = drv_data->cur_msg; + transfer = drv_data->cur_transfer; + chip = drv_data->cur_chip; + + if (transfer->speed_hz) + transfer_speed = bfin_sport_hz_to_spi_baud(transfer->speed_hz); + else + transfer_speed = chip->baud; + bfin_write(&drv_data->regs->tclkdiv, transfer_speed); + SSYNC(); + + /* + * if msg is error or done, report it back using complete() callback + */ + + /* Handle for abort */ + if (drv_data->state == ERROR_STATE) { + dev_dbg(drv_data->dev, "transfer: we've hit an error\n"); + message->status = -EIO; + bfin_sport_spi_giveback(drv_data); + return; + } + + /* Handle end of message */ + if (drv_data->state == DONE_STATE) { + dev_dbg(drv_data->dev, "transfer: all done!\n"); + message->status = 0; + bfin_sport_spi_giveback(drv_data); + return; + } + + /* Delay if requested at end of transfer */ + if (drv_data->state == RUNNING_STATE) { + dev_dbg(drv_data->dev, "transfer: still running ...\n"); + previous = list_entry(transfer->transfer_list.prev, + struct spi_transfer, transfer_list); + if (previous->delay_usecs) + udelay(previous->delay_usecs); + } + + if (transfer->len == 0) { + /* Move to next transfer of this msg */ + drv_data->state = bfin_sport_spi_next_transfer(drv_data); + /* Schedule next transfer tasklet */ + tasklet_schedule(&drv_data->pump_transfers); + } + + if (transfer->tx_buf != NULL) { + drv_data->tx = (void *)transfer->tx_buf; + drv_data->tx_end = drv_data->tx + transfer->len; + dev_dbg(drv_data->dev, "tx_buf is %p, tx_end is %p\n", + transfer->tx_buf, drv_data->tx_end); + } else + drv_data->tx = NULL; + + if (transfer->rx_buf != NULL) { + full_duplex = transfer->tx_buf != NULL; + drv_data->rx = transfer->rx_buf; + drv_data->rx_end = drv_data->rx + transfer->len; + dev_dbg(drv_data->dev, "rx_buf is %p, rx_end is %p\n", + transfer->rx_buf, drv_data->rx_end); + } else + drv_data->rx = NULL; + + drv_data->cs_change = transfer->cs_change; + + /* Bits per word setup */ + bits_per_word = transfer->bits_per_word ? : message->spi->bits_per_word; + if (bits_per_word == 8) + drv_data->ops = &bfin_sport_transfer_ops_u8; + else + drv_data->ops = &bfin_sport_transfer_ops_u16; + + drv_data->state = RUNNING_STATE; + + if (drv_data->cs_change) + bfin_sport_spi_cs_active(chip); + + dev_dbg(drv_data->dev, + "now pumping a transfer: width is %d, len is %d\n", + bits_per_word, transfer->len); + + /* PIO mode write then read */ + dev_dbg(drv_data->dev, "doing IO transfer\n"); + + bfin_sport_spi_enable(drv_data); + if (full_duplex) { + /* full duplex mode */ + BUG_ON((drv_data->tx_end - drv_data->tx) != + (drv_data->rx_end - drv_data->rx)); + drv_data->ops->duplex(drv_data); + + if (drv_data->tx != drv_data->tx_end) + tranf_success = 0; + } else if (drv_data->tx != NULL) { + /* write only half duplex */ + + drv_data->ops->write(drv_data); + + if (drv_data->tx != drv_data->tx_end) + tranf_success = 0; + } else if (drv_data->rx != NULL) { + /* read only half duplex */ + + drv_data->ops->read(drv_data); + if (drv_data->rx != drv_data->rx_end) + tranf_success = 0; + } + bfin_sport_spi_disable(drv_data); + + if (!tranf_success) { + dev_dbg(drv_data->dev, "IO write error!\n"); + drv_data->state = ERROR_STATE; + } else { + /* Update total byte transfered */ + message->actual_length += transfer->len; + /* Move to next transfer of this msg */ + drv_data->state = bfin_sport_spi_next_transfer(drv_data); + if (drv_data->cs_change) + bfin_sport_spi_cs_deactive(chip); + } + + /* Schedule next transfer tasklet */ + tasklet_schedule(&drv_data->pump_transfers); +} + +/* pop a msg from queue and kick off real transfer */ +static void +bfin_sport_spi_pump_messages(struct work_struct *work) +{ + struct bfin_sport_spi_master_data *drv_data; + unsigned long flags; + struct spi_message *next_msg; + + drv_data = container_of(work, struct bfin_sport_spi_master_data, pump_messages); + + /* Lock queue and check for queue work */ + spin_lock_irqsave(&drv_data->lock, flags); + if (list_empty(&drv_data->queue) || !drv_data->run) { + /* pumper kicked off but no work to do */ + drv_data->busy = 0; + spin_unlock_irqrestore(&drv_data->lock, flags); + return; + } + + /* Make sure we are not already running a message */ + if (drv_data->cur_msg) { + spin_unlock_irqrestore(&drv_data->lock, flags); + return; + } + + /* Extract head of queue */ + next_msg = list_entry(drv_data->queue.next, + struct spi_message, queue); + + drv_data->cur_msg = next_msg; + + /* Setup the SSP using the per chip configuration */ + drv_data->cur_chip = spi_get_ctldata(drv_data->cur_msg->spi); + + list_del_init(&drv_data->cur_msg->queue); + + /* Initialize message state */ + drv_data->cur_msg->state = START_STATE; + drv_data->cur_transfer = list_entry(drv_data->cur_msg->transfers.next, + struct spi_transfer, transfer_list); + bfin_sport_spi_restore_state(drv_data); + dev_dbg(drv_data->dev, "got a message to pump, " + "state is set to: baud %d, cs_gpio %i, ctl 0x%x\n", + drv_data->cur_chip->baud, drv_data->cur_chip->cs_gpio, + drv_data->cur_chip->ctl_reg); + + dev_dbg(drv_data->dev, + "the first transfer len is %d\n", + drv_data->cur_transfer->len); + + /* Mark as busy and launch transfers */ + tasklet_schedule(&drv_data->pump_transfers); + + drv_data->busy = 1; + spin_unlock_irqrestore(&drv_data->lock, flags); +} + +/* + * got a msg to transfer, queue it in drv_data->queue. + * And kick off message pumper + */ +static int +bfin_sport_spi_transfer(struct spi_device *spi, struct spi_message *msg) +{ + struct bfin_sport_spi_master_data *drv_data = spi_master_get_devdata(spi->master); + unsigned long flags; + + spin_lock_irqsave(&drv_data->lock, flags); + + if (!drv_data->run) { + spin_unlock_irqrestore(&drv_data->lock, flags); + return -ESHUTDOWN; + } + + msg->actual_length = 0; + msg->status = -EINPROGRESS; + msg->state = START_STATE; + + dev_dbg(&spi->dev, "adding an msg in transfer()\n"); + list_add_tail(&msg->queue, &drv_data->queue); + + if (drv_data->run && !drv_data->busy) + queue_work(drv_data->workqueue, &drv_data->pump_messages); + + spin_unlock_irqrestore(&drv_data->lock, flags); + + return 0; +} + +/* Called every time common spi devices change state */ +static int +bfin_sport_spi_setup(struct spi_device *spi) +{ + struct bfin_sport_spi_slave_data *chip, *first = NULL; + int ret; + + /* Only alloc (or use chip_info) on first setup */ + chip = spi_get_ctldata(spi); + if (chip == NULL) { + struct bfin5xx_spi_chip *chip_info; + + chip = first = kzalloc(sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + /* platform chip_info isn't required */ + chip_info = spi->controller_data; + if (chip_info) { + /* + * DITFS and TDTYPE are only thing we don't set, but + * they probably shouldn't be changed by people. + */ + if (chip_info->ctl_reg || chip_info->enable_dma) { + ret = -EINVAL; + dev_err(&spi->dev, "don't set ctl_reg/enable_dma fields"); + goto error; + } + chip->cs_chg_udelay = chip_info->cs_chg_udelay; + chip->idle_tx_val = chip_info->idle_tx_val; + spi->bits_per_word = chip_info->bits_per_word; + } + } + + if (spi->bits_per_word != 8 && spi->bits_per_word != 16) { + ret = -EINVAL; + goto error; + } + + /* translate common spi framework into our register + * following configure contents are same for tx and rx. + */ + + if (spi->mode & SPI_CPHA) + chip->ctl_reg &= ~TCKFE; + else + chip->ctl_reg |= TCKFE; + + if (spi->mode & SPI_LSB_FIRST) + chip->ctl_reg |= TLSBIT; + else + chip->ctl_reg &= ~TLSBIT; + + /* Sport in master mode */ + chip->ctl_reg |= ITCLK | ITFS | TFSR | LATFS | LTFS; + + chip->baud = bfin_sport_hz_to_spi_baud(spi->max_speed_hz); + + chip->cs_gpio = spi->chip_select; + ret = gpio_request(chip->cs_gpio, spi->modalias); + if (ret) + goto error; + + dev_dbg(&spi->dev, "setup spi chip %s, width is %d\n", + spi->modalias, spi->bits_per_word); + dev_dbg(&spi->dev, "ctl_reg is 0x%x, GPIO is %i\n", + chip->ctl_reg, spi->chip_select); + + spi_set_ctldata(spi, chip); + + bfin_sport_spi_cs_deactive(chip); + + return ret; + + error: + kfree(first); + return ret; +} + +/* + * callback for spi framework. + * clean driver specific data + */ +static void +bfin_sport_spi_cleanup(struct spi_device *spi) +{ + struct bfin_sport_spi_slave_data *chip = spi_get_ctldata(spi); + + if (!chip) + return; + + gpio_free(chip->cs_gpio); + + kfree(chip); +} + +static int +bfin_sport_spi_init_queue(struct bfin_sport_spi_master_data *drv_data) +{ + INIT_LIST_HEAD(&drv_data->queue); + spin_lock_init(&drv_data->lock); + + drv_data->run = false; + drv_data->busy = 0; + + /* init transfer tasklet */ + tasklet_init(&drv_data->pump_transfers, + bfin_sport_spi_pump_transfers, (unsigned long)drv_data); + + /* init messages workqueue */ + INIT_WORK(&drv_data->pump_messages, bfin_sport_spi_pump_messages); + drv_data->workqueue = + create_singlethread_workqueue(dev_name(drv_data->master->dev.parent)); + if (drv_data->workqueue == NULL) + return -EBUSY; + + return 0; +} + +static int +bfin_sport_spi_start_queue(struct bfin_sport_spi_master_data *drv_data) +{ + unsigned long flags; + + spin_lock_irqsave(&drv_data->lock, flags); + + if (drv_data->run || drv_data->busy) { + spin_unlock_irqrestore(&drv_data->lock, flags); + return -EBUSY; + } + + drv_data->run = true; + drv_data->cur_msg = NULL; + drv_data->cur_transfer = NULL; + drv_data->cur_chip = NULL; + spin_unlock_irqrestore(&drv_data->lock, flags); + + queue_work(drv_data->workqueue, &drv_data->pump_messages); + + return 0; +} + +static inline int +bfin_sport_spi_stop_queue(struct bfin_sport_spi_master_data *drv_data) +{ + unsigned long flags; + unsigned limit = 500; + int status = 0; + + spin_lock_irqsave(&drv_data->lock, flags); + + /* + * This is a bit lame, but is optimized for the common execution path. + * A wait_queue on the drv_data->busy could be used, but then the common + * execution path (pump_messages) would be required to call wake_up or + * friends on every SPI message. Do this instead + */ + drv_data->run = false; + while (!list_empty(&drv_data->queue) && drv_data->busy && limit--) { + spin_unlock_irqrestore(&drv_data->lock, flags); + msleep(10); + spin_lock_irqsave(&drv_data->lock, flags); + } + + if (!list_empty(&drv_data->queue) || drv_data->busy) + status = -EBUSY; + + spin_unlock_irqrestore(&drv_data->lock, flags); + + return status; +} + +static inline int +bfin_sport_spi_destroy_queue(struct bfin_sport_spi_master_data *drv_data) +{ + int status; + + status = bfin_sport_spi_stop_queue(drv_data); + if (status) + return status; + + destroy_workqueue(drv_data->workqueue); + + return 0; +} + +static int __devinit +bfin_sport_spi_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct bfin5xx_spi_master *platform_info; + struct spi_master *master; + struct resource *res, *ires; + struct bfin_sport_spi_master_data *drv_data; + int status; + + platform_info = dev->platform_data; + + /* Allocate master with space for drv_data */ + master = spi_alloc_master(dev, sizeof(*master) + 16); + if (!master) { + dev_err(dev, "cannot alloc spi_master\n"); + return -ENOMEM; + } + + drv_data = spi_master_get_devdata(master); + drv_data->master = master; + drv_data->dev = dev; + drv_data->pin_req = platform_info->pin_req; + + master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_LSB_FIRST; + master->bus_num = pdev->id; + master->num_chipselect = platform_info->num_chipselect; + master->cleanup = bfin_sport_spi_cleanup; + master->setup = bfin_sport_spi_setup; + master->transfer = bfin_sport_spi_transfer; + + /* Find and map our resources */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (res == NULL) { + dev_err(dev, "cannot get IORESOURCE_MEM\n"); + status = -ENOENT; + goto out_error_get_res; + } + + drv_data->regs = ioremap(res->start, resource_size(res)); + if (drv_data->regs == NULL) { + dev_err(dev, "cannot map registers\n"); + status = -ENXIO; + goto out_error_ioremap; + } + + ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!ires) { + dev_err(dev, "cannot get IORESOURCE_IRQ\n"); + status = -ENODEV; + goto out_error_get_ires; + } + drv_data->err_irq = ires->start; + + /* Initial and start queue */ + status = bfin_sport_spi_init_queue(drv_data); + if (status) { + dev_err(dev, "problem initializing queue\n"); + goto out_error_queue_alloc; + } + + status = bfin_sport_spi_start_queue(drv_data); + if (status) { + dev_err(dev, "problem starting queue\n"); + goto out_error_queue_alloc; + } + + status = request_irq(drv_data->err_irq, sport_err_handler, + 0, "sport_spi_err", drv_data); + if (status) { + dev_err(dev, "unable to request sport err irq\n"); + goto out_error_irq; + } + + status = peripheral_request_list(drv_data->pin_req, DRV_NAME); + if (status) { + dev_err(dev, "requesting peripherals failed\n"); + goto out_error_peripheral; + } + + /* Register with the SPI framework */ + platform_set_drvdata(pdev, drv_data); + status = spi_register_master(master); + if (status) { + dev_err(dev, "problem registering spi master\n"); + goto out_error_master; + } + + dev_info(dev, "%s, regs_base@%p\n", DRV_DESC, drv_data->regs); + return 0; + + out_error_master: + peripheral_free_list(drv_data->pin_req); + out_error_peripheral: + free_irq(drv_data->err_irq, drv_data); + out_error_irq: + out_error_queue_alloc: + bfin_sport_spi_destroy_queue(drv_data); + out_error_get_ires: + iounmap(drv_data->regs); + out_error_ioremap: + out_error_get_res: + spi_master_put(master); + + return status; +} + +/* stop hardware and remove the driver */ +static int __devexit +bfin_sport_spi_remove(struct platform_device *pdev) +{ + struct bfin_sport_spi_master_data *drv_data = platform_get_drvdata(pdev); + int status = 0; + + if (!drv_data) + return 0; + + /* Remove the queue */ + status = bfin_sport_spi_destroy_queue(drv_data); + if (status) + return status; + + /* Disable the SSP at the peripheral and SOC level */ + bfin_sport_spi_disable(drv_data); + + /* Disconnect from the SPI framework */ + spi_unregister_master(drv_data->master); + + peripheral_free_list(drv_data->pin_req); + + /* Prevent double remove */ + platform_set_drvdata(pdev, NULL); + + return 0; +} + +#ifdef CONFIG_PM +static int +bfin_sport_spi_suspend(struct platform_device *pdev, pm_message_t state) +{ + struct bfin_sport_spi_master_data *drv_data = platform_get_drvdata(pdev); + int status; + + status = bfin_sport_spi_stop_queue(drv_data); + if (status) + return status; + + /* stop hardware */ + bfin_sport_spi_disable(drv_data); + + return status; +} + +static int +bfin_sport_spi_resume(struct platform_device *pdev) +{ + struct bfin_sport_spi_master_data *drv_data = platform_get_drvdata(pdev); + int status; + + /* Enable the SPI interface */ + bfin_sport_spi_enable(drv_data); + + /* Start the queue running */ + status = bfin_sport_spi_start_queue(drv_data); + if (status) + dev_err(drv_data->dev, "problem resuming queue\n"); + + return status; +} +#else +# define bfin_sport_spi_suspend NULL +# define bfin_sport_spi_resume NULL +#endif + +static struct platform_driver bfin_sport_spi_driver = { + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + }, + .probe = bfin_sport_spi_probe, + .remove = __devexit_p(bfin_sport_spi_remove), + .suspend = bfin_sport_spi_suspend, + .resume = bfin_sport_spi_resume, +}; + +static int __init bfin_sport_spi_init(void) +{ + return platform_driver_register(&bfin_sport_spi_driver); +} +module_init(bfin_sport_spi_init); + +static void __exit bfin_sport_spi_exit(void) +{ + platform_driver_unregister(&bfin_sport_spi_driver); +} +module_exit(bfin_sport_spi_exit); diff --git a/drivers/spi/tle62x0.c b/drivers/spi/tle62x0.c index a3938958147..32a40876532 100644 --- a/drivers/spi/tle62x0.c +++ b/drivers/spi/tle62x0.c @@ -283,7 +283,7 @@ static int __devinit tle62x0_probe(struct spi_device *spi) return 0; err_gpios: - for (; ptr > 0; ptr--) + while (--ptr >= 0) device_remove_file(&spi->dev, gpio_attrs[ptr]); device_remove_file(&spi->dev, &dev_attr_status_show); @@ -301,6 +301,7 @@ static int __devexit tle62x0_remove(struct spi_device *spi) for (ptr = 0; ptr < st->nr_gpio; ptr++) device_remove_file(&spi->dev, gpio_attrs[ptr]); + device_remove_file(&spi->dev, &dev_attr_status_show); kfree(st); return 0; } diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index fc6f2a5bde0..0b1c82ad680 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -499,7 +499,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) dev_set_drvdata(hwmon->device, hwmon); result = device_create_file(hwmon->device, &dev_attr_name); if (result) - goto unregister_hwmon_device; + goto free_mem; register_sys_interface: tz->hwmon = hwmon; @@ -513,7 +513,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) sysfs_attr_init(&tz->temp_input.attr.attr); result = device_create_file(hwmon->device, &tz->temp_input.attr); if (result) - goto unregister_hwmon_device; + goto unregister_name; if (tz->ops->get_crit_temp) { unsigned long temperature; @@ -527,7 +527,7 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) result = device_create_file(hwmon->device, &tz->temp_crit.attr); if (result) - goto unregister_hwmon_device; + goto unregister_input; } } @@ -539,9 +539,9 @@ thermal_add_hwmon_sysfs(struct thermal_zone_device *tz) return 0; - unregister_hwmon_device: - device_remove_file(hwmon->device, &tz->temp_crit.attr); + unregister_input: device_remove_file(hwmon->device, &tz->temp_input.attr); + unregister_name: if (new_hwmon_device) { device_remove_file(hwmon->device, &dev_attr_name); hwmon_device_unregister(hwmon->device); diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index a4c42a75a3b..09e8c7d53af 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2128,8 +2128,8 @@ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) gsm->tty = NULL; } -static unsigned int gsmld_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct gsm_mux *gsm = tty->disc_data; const unsigned char *dp; @@ -2162,8 +2162,6 @@ static unsigned int gsmld_receive_buf(struct tty_struct *tty, } /* FASYNC if needed ? */ /* If clogged call tty_throttle(tty); */ - - return count; } /** diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index cac666314ae..cea56033b34 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -188,8 +188,8 @@ static unsigned int n_hdlc_tty_poll(struct tty_struct *tty, struct file *filp, poll_table *wait); static int n_hdlc_tty_open(struct tty_struct *tty); static void n_hdlc_tty_close(struct tty_struct *tty); -static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, - const __u8 *cp, char *fp, int count); +static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *cp, + char *fp, int count); static void n_hdlc_tty_wakeup(struct tty_struct *tty); #define bset(p,b) ((p)[(b) >> 5] |= (1 << ((b) & 0x1f))) @@ -509,8 +509,8 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty) * Called by tty low level driver when receive data is available. Data is * interpreted as one HDLC frame. */ -static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, - const __u8 *data, char *flags, int count) +static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data, + char *flags, int count) { register struct n_hdlc *n_hdlc = tty2n_hdlc (tty); register struct n_hdlc_buf *buf; @@ -521,20 +521,20 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, /* This can happen if stuff comes in on the backup tty */ if (!n_hdlc || tty != n_hdlc->tty) - return -ENODEV; + return; /* verify line is using HDLC discipline */ if (n_hdlc->magic != HDLC_MAGIC) { printk("%s(%d) line not using HDLC discipline\n", __FILE__,__LINE__); - return -EINVAL; + return; } if ( count>maxframe ) { if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d) rx count>maxframesize, data discarded\n", __FILE__,__LINE__); - return -EINVAL; + return; } /* get a free HDLC buffer */ @@ -550,7 +550,7 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, if (debuglevel >= DEBUG_LEVEL_INFO) printk("%s(%d) no more rx buffers, data discarded\n", __FILE__,__LINE__); - return -EINVAL; + return; } /* copy received data to HDLC buffer */ @@ -565,8 +565,6 @@ static unsigned int n_hdlc_tty_receive(struct tty_struct *tty, if (n_hdlc->tty->fasync != NULL) kill_fasync (&n_hdlc->tty->fasync, SIGIO, POLL_IN); - return count; - } /* end of n_hdlc_tty_receive() */ /** diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c index a4bc39c21a4..5c6c31459a2 100644 --- a/drivers/tty/n_r3964.c +++ b/drivers/tty/n_r3964.c @@ -139,8 +139,8 @@ static int r3964_ioctl(struct tty_struct *tty, struct file *file, static void r3964_set_termios(struct tty_struct *tty, struct ktermios *old); static unsigned int r3964_poll(struct tty_struct *tty, struct file *file, struct poll_table_struct *wait); -static unsigned int r3964_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count); +static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count); static struct tty_ldisc_ops tty_ldisc_N_R3964 = { .owner = THIS_MODULE, @@ -1239,8 +1239,8 @@ static unsigned int r3964_poll(struct tty_struct *tty, struct file *file, return result; } -static unsigned int r3964_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void r3964_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { struct r3964_info *pInfo = tty->disc_data; const unsigned char *p; @@ -1257,8 +1257,6 @@ static unsigned int r3964_receive_buf(struct tty_struct *tty, } } - - return count; } MODULE_LICENSE("GPL"); diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 95d0a9c2dd1..0ad32888091 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -81,6 +81,38 @@ static inline int tty_put_user(struct tty_struct *tty, unsigned char x, return put_user(x, ptr); } +/** + * n_tty_set__room - receive space + * @tty: terminal + * + * Called by the driver to find out how much data it is + * permitted to feed to the line discipline without any being lost + * and thus to manage flow control. Not serialized. Answers for the + * "instant". + */ + +static void n_tty_set_room(struct tty_struct *tty) +{ + /* tty->read_cnt is not read locked ? */ + int left = N_TTY_BUF_SIZE - tty->read_cnt - 1; + int old_left; + + /* + * If we are doing input canonicalization, and there are no + * pending newlines, let characters through without limit, so + * that erase characters will be handled. Other excess + * characters will be beeped. + */ + if (left <= 0) + left = tty->icanon && !tty->canon_data; + old_left = tty->receive_room; + tty->receive_room = left; + + /* Did this open up the receive buffer? We may need to flip */ + if (left && !old_left) + schedule_work(&tty->buf.work); +} + static void put_tty_queue_nolock(unsigned char c, struct tty_struct *tty) { if (tty->read_cnt < N_TTY_BUF_SIZE) { @@ -152,6 +184,7 @@ static void reset_buffer_flags(struct tty_struct *tty) tty->canon_head = tty->canon_data = tty->erasing = 0; memset(&tty->read_flags, 0, sizeof tty->read_flags); + n_tty_set_room(tty); check_unthrottle(tty); } @@ -1327,19 +1360,17 @@ static void n_tty_write_wakeup(struct tty_struct *tty) * calls one at a time and in order (or using flush_to_ldisc) */ -static unsigned int n_tty_receive_buf(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void n_tty_receive_buf(struct tty_struct *tty, const unsigned char *cp, + char *fp, int count) { const unsigned char *p; char *f, flags = TTY_NORMAL; int i; char buf[64]; unsigned long cpuflags; - int left; - int ret = 0; if (!tty->read_buf) - return 0; + return; if (tty->real_raw) { spin_lock_irqsave(&tty->read_lock, cpuflags); @@ -1349,7 +1380,6 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, memcpy(tty->read_buf + tty->read_head, cp, i); tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1); tty->read_cnt += i; - ret += i; cp += i; count -= i; @@ -1359,10 +1389,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, memcpy(tty->read_buf + tty->read_head, cp, i); tty->read_head = (tty->read_head + i) & (N_TTY_BUF_SIZE-1); tty->read_cnt += i; - ret += i; spin_unlock_irqrestore(&tty->read_lock, cpuflags); } else { - ret = count; for (i = count, p = cp, f = fp; i; i--, p++) { if (f) flags = *f++; @@ -1390,6 +1418,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, tty->ops->flush_chars(tty); } + n_tty_set_room(tty); + if ((!tty->icanon && (tty->read_cnt >= tty->minimum_to_wake)) || L_EXTPROC(tty)) { kill_fasync(&tty->fasync, SIGIO, POLL_IN); @@ -1402,12 +1432,8 @@ static unsigned int n_tty_receive_buf(struct tty_struct *tty, * mode. We don't want to throttle the driver if we're in * canonical mode and don't have a newline yet! */ - left = N_TTY_BUF_SIZE - tty->read_cnt - 1; - - if (left < TTY_THRESHOLD_THROTTLE) + if (tty->receive_room < TTY_THRESHOLD_THROTTLE) tty_throttle(tty); - - return ret; } int is_ignored(int sig) @@ -1451,6 +1477,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) if (test_bit(TTY_HW_COOK_IN, &tty->flags)) { tty->raw = 1; tty->real_raw = 1; + n_tty_set_room(tty); return; } if (I_ISTRIP(tty) || I_IUCLC(tty) || I_IGNCR(tty) || @@ -1503,6 +1530,7 @@ static void n_tty_set_termios(struct tty_struct *tty, struct ktermios *old) else tty->real_raw = 0; } + n_tty_set_room(tty); /* The termios change make the tty ready for I/O */ wake_up_interruptible(&tty->write_wait); wake_up_interruptible(&tty->read_wait); @@ -1784,6 +1812,8 @@ do_it_again: retval = -ERESTARTSYS; break; } + /* FIXME: does n_tty_set_room need locking ? */ + n_tty_set_room(tty); timeout = schedule_timeout(timeout); continue; } @@ -1855,8 +1885,10 @@ do_it_again: * longer than TTY_THRESHOLD_UNTHROTTLE in canonical mode, * we won't get any more characters. */ - if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE) + if (n_tty_chars_in_buffer(tty) <= TTY_THRESHOLD_UNTHROTTLE) { + n_tty_set_room(tty); check_unthrottle(tty); + } if (b - buf >= minimum) break; @@ -1878,6 +1910,7 @@ do_it_again: } else if (test_and_clear_bit(TTY_PUSH, &tty->flags)) goto do_it_again; + n_tty_set_room(tty); return retval; } diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 46de2e075da..f1a7918d71a 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -416,7 +416,6 @@ static void flush_to_ldisc(struct work_struct *work) struct tty_buffer *head, *tail = tty->buf.tail; int seen_tail = 0; while ((head = tty->buf.head) != NULL) { - int copied; int count; char *char_buf; unsigned char *flag_buf; @@ -443,19 +442,17 @@ static void flush_to_ldisc(struct work_struct *work) line discipline as we want to empty the queue */ if (test_bit(TTY_FLUSHPENDING, &tty->flags)) break; + if (!tty->receive_room || seen_tail) + break; + if (count > tty->receive_room) + count = tty->receive_room; char_buf = head->char_buf_ptr + head->read; flag_buf = head->flag_buf_ptr + head->read; + head->read += count; spin_unlock_irqrestore(&tty->buf.lock, flags); - copied = disc->ops->receive_buf(tty, char_buf, + disc->ops->receive_buf(tty, char_buf, flag_buf, count); spin_lock_irqsave(&tty->buf.lock, flags); - - head->read += copied; - - if (copied == 0 || seen_tail) { - schedule_work(&tty->buf.work); - break; - } } clear_bit(TTY_FLUSHING, &tty->flags); } diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index 67b1d0d7c8a..fb864e7fcd1 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -332,7 +332,8 @@ int paste_selection(struct tty_struct *tty) continue; } count = sel_buffer_lth - pasted; - count = tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted, + count = min(count, tty->receive_room); + tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted, NULL, count); pasted += count; } diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index 660b80a75ca..1102ce65a3a 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -348,11 +348,50 @@ static int ehci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) return rc; } +static bool usb_is_intel_switchable_ehci(struct pci_dev *pdev) +{ + return pdev->class == PCI_CLASS_SERIAL_USB_EHCI && + pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == 0x1E26; +} + +static void ehci_enable_xhci_companion(void) +{ + struct pci_dev *companion = NULL; + + /* The xHCI and EHCI controllers are not on the same PCI slot */ + for_each_pci_dev(companion) { + if (!usb_is_intel_switchable_xhci(companion)) + continue; + usb_enable_xhci_ports(companion); + return; + } +} + static int ehci_pci_resume(struct usb_hcd *hcd, bool hibernated) { struct ehci_hcd *ehci = hcd_to_ehci(hcd); struct pci_dev *pdev = to_pci_dev(hcd->self.controller); + /* The BIOS on systems with the Intel Panther Point chipset may or may + * not support xHCI natively. That means that during system resume, it + * may switch the ports back to EHCI so that users can use their + * keyboard to select a kernel from GRUB after resume from hibernate. + * + * The BIOS is supposed to remember whether the OS had xHCI ports + * enabled before resume, and switch the ports back to xHCI when the + * BIOS/OS semaphore is written, but we all know we can't trust BIOS + * writers. + * + * Unconditionally switch the ports back to xHCI after a system resume. + * We can't tell whether the EHCI or xHCI controller will be resumed + * first, so we have to do the port switchover in both drivers. Writing + * a '1' to the port switchover registers should have no effect if the + * port was already switched over. + */ + if (usb_is_intel_switchable_ehci(pdev)) + ehci_enable_xhci_companion(); + // maybe restore FLADJ if (time_before(jiffies, ehci->next_statechange)) diff --git a/drivers/usb/host/pci-quirks.c b/drivers/usb/host/pci-quirks.c index f16c59d5f48..fd930618c28 100644 --- a/drivers/usb/host/pci-quirks.c +++ b/drivers/usb/host/pci-quirks.c @@ -69,6 +69,9 @@ #define NB_PIF0_PWRDOWN_0 0x01100012 #define NB_PIF0_PWRDOWN_1 0x01100013 +#define USB_INTEL_XUSB2PR 0xD0 +#define USB_INTEL_USB3_PSSEN 0xD8 + static struct amd_chipset_info { struct pci_dev *nb_dev; struct pci_dev *smbus_dev; @@ -673,6 +676,64 @@ static int handshake(void __iomem *ptr, u32 mask, u32 done, return -ETIMEDOUT; } +bool usb_is_intel_switchable_xhci(struct pci_dev *pdev) +{ + return pdev->class == PCI_CLASS_SERIAL_USB_XHCI && + pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI; +} +EXPORT_SYMBOL_GPL(usb_is_intel_switchable_xhci); + +/* + * Intel's Panther Point chipset has two host controllers (EHCI and xHCI) that + * share some number of ports. These ports can be switched between either + * controller. Not all of the ports under the EHCI host controller may be + * switchable. + * + * The ports should be switched over to xHCI before PCI probes for any device + * start. This avoids active devices under EHCI being disconnected during the + * port switchover, which could cause loss of data on USB storage devices, or + * failed boot when the root file system is on a USB mass storage device and is + * enumerated under EHCI first. + * + * We write into the xHC's PCI configuration space in some Intel-specific + * registers to switch the ports over. The USB 3.0 terminations and the USB + * 2.0 data wires are switched separately. We want to enable the SuperSpeed + * terminations before switching the USB 2.0 wires over, so that USB 3.0 + * devices connect at SuperSpeed, rather than at USB 2.0 speeds. + */ +void usb_enable_xhci_ports(struct pci_dev *xhci_pdev) +{ + u32 ports_available; + + ports_available = 0xffffffff; + /* Write USB3_PSSEN, the USB 3.0 Port SuperSpeed Enable + * Register, to turn on SuperSpeed terminations for all + * available ports. + */ + pci_write_config_dword(xhci_pdev, USB_INTEL_USB3_PSSEN, + cpu_to_le32(ports_available)); + + pci_read_config_dword(xhci_pdev, USB_INTEL_USB3_PSSEN, + &ports_available); + dev_dbg(&xhci_pdev->dev, "USB 3.0 ports that are now enabled " + "under xHCI: 0x%x\n", ports_available); + + ports_available = 0xffffffff; + /* Write XUSB2PR, the xHC USB 2.0 Port Routing Register, to + * switch the USB 2.0 power and data lines over to the xHCI + * host. + */ + pci_write_config_dword(xhci_pdev, USB_INTEL_XUSB2PR, + cpu_to_le32(ports_available)); + + pci_read_config_dword(xhci_pdev, USB_INTEL_XUSB2PR, + &ports_available); + dev_dbg(&xhci_pdev->dev, "USB 2.0 ports that are now switched over " + "to xHCI: 0x%x\n", ports_available); +} +EXPORT_SYMBOL_GPL(usb_enable_xhci_ports); + /** * PCI Quirks for xHCI. * @@ -732,6 +793,8 @@ static void __devinit quirk_usb_handoff_xhci(struct pci_dev *pdev) writel(XHCI_LEGACY_DISABLE_SMI, base + ext_cap_offset + XHCI_LEGACY_CONTROL_OFFSET); + if (usb_is_intel_switchable_xhci(pdev)) + usb_enable_xhci_ports(pdev); hc_init: op_reg_base = base + XHCI_HC_LENGTH(readl(base)); diff --git a/drivers/usb/host/pci-quirks.h b/drivers/usb/host/pci-quirks.h index 6ae9f78e993..b1002a8ef96 100644 --- a/drivers/usb/host/pci-quirks.h +++ b/drivers/usb/host/pci-quirks.h @@ -8,6 +8,8 @@ int usb_amd_find_chipset_info(void); void usb_amd_dev_put(void); void usb_amd_quirk_pll_disable(void); void usb_amd_quirk_pll_enable(void); +bool usb_is_intel_switchable_xhci(struct pci_dev *pdev); +void usb_enable_xhci_ports(struct pci_dev *xhci_pdev); #else static inline void usb_amd_quirk_pll_disable(void) {} static inline void usb_amd_quirk_pll_enable(void) {} diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index cbc4d491e62..c408e9f6a70 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -118,6 +118,12 @@ static int xhci_pci_setup(struct usb_hcd *hcd) /* AMD PLL quirk */ if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info()) xhci->quirks |= XHCI_AMD_PLL_FIX; + if (pdev->vendor == PCI_VENDOR_ID_INTEL && + pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) { + xhci->quirks |= XHCI_SPURIOUS_SUCCESS; + xhci->quirks |= XHCI_EP_LIMIT_QUIRK; + xhci->limit_active_eps = 64; + } /* Make sure the HC is halted. */ retval = xhci_halt(xhci); @@ -242,8 +248,28 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup) static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated) { struct xhci_hcd *xhci = hcd_to_xhci(hcd); + struct pci_dev *pdev = to_pci_dev(hcd->self.controller); int retval = 0; + /* The BIOS on systems with the Intel Panther Point chipset may or may + * not support xHCI natively. That means that during system resume, it + * may switch the ports back to EHCI so that users can use their + * keyboard to select a kernel from GRUB after resume from hibernate. + * + * The BIOS is supposed to remember whether the OS had xHCI ports + * enabled before resume, and switch the ports back to xHCI when the + * BIOS/OS semaphore is written, but we all know we can't trust BIOS + * writers. + * + * Unconditionally switch the ports back to xHCI after a system resume. + * We can't tell whether the EHCI or xHCI controller will be resumed + * first, so we have to do the port switchover in both drivers. Writing + * a '1' to the port switchover registers should have no effect if the + * port was already switched over. + */ + if (usb_is_intel_switchable_xhci(pdev)) + usb_enable_xhci_ports(pdev); + retval = xhci_resume(xhci, hibernated); return retval; } diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 237a765f8d1..cc1485bfed3 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -167,12 +167,6 @@ static void inc_deq(struct xhci_hcd *xhci, struct xhci_ring *ring, bool consumer next = ring->dequeue; } addr = (unsigned long long) xhci_trb_virt_to_dma(ring->deq_seg, ring->dequeue); - if (ring == xhci->event_ring) - xhci_dbg(xhci, "Event ring deq = 0x%llx (DMA)\n", addr); - else if (ring == xhci->cmd_ring) - xhci_dbg(xhci, "Command ring deq = 0x%llx (DMA)\n", addr); - else - xhci_dbg(xhci, "Ring deq = 0x%llx (DMA)\n", addr); } /* @@ -248,12 +242,6 @@ static void inc_enq(struct xhci_hcd *xhci, struct xhci_ring *ring, next = ring->enqueue; } addr = (unsigned long long) xhci_trb_virt_to_dma(ring->enq_seg, ring->enqueue); - if (ring == xhci->event_ring) - xhci_dbg(xhci, "Event ring enq = 0x%llx (DMA)\n", addr); - else if (ring == xhci->cmd_ring) - xhci_dbg(xhci, "Command ring enq = 0x%llx (DMA)\n", addr); - else - xhci_dbg(xhci, "Ring enq = 0x%llx (DMA)\n", addr); } /* @@ -636,13 +624,11 @@ static void xhci_giveback_urb_in_irq(struct xhci_hcd *xhci, } } usb_hcd_unlink_urb_from_ep(hcd, urb); - xhci_dbg(xhci, "Giveback %s URB %p\n", adjective, urb); spin_unlock(&xhci->lock); usb_hcd_giveback_urb(hcd, urb, status); xhci_urb_free_priv(xhci, urb_priv); spin_lock(&xhci->lock); - xhci_dbg(xhci, "%s URB given back\n", adjective); } } @@ -692,6 +678,8 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci, if (list_empty(&ep->cancelled_td_list)) { xhci_stop_watchdog_timer_in_irq(xhci, ep); + ep->stopped_td = NULL; + ep->stopped_trb = NULL; ring_doorbell_for_active_rings(xhci, slot_id, ep_index); return; } @@ -1093,8 +1081,13 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, complete(&xhci->addr_dev); break; case TRB_TYPE(TRB_DISABLE_SLOT): - if (xhci->devs[slot_id]) + if (xhci->devs[slot_id]) { + if (xhci->quirks & XHCI_EP_LIMIT_QUIRK) + /* Delete default control endpoint resources */ + xhci_free_device_endpoint_resources(xhci, + xhci->devs[slot_id], true); xhci_free_virt_device(xhci, slot_id); + } break; case TRB_TYPE(TRB_CONFIG_EP): virt_dev = xhci->devs[slot_id]; @@ -1630,7 +1623,6 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td, "without IOC set??\n"); *status = -ESHUTDOWN; } else { - xhci_dbg(xhci, "Successful control transfer!\n"); *status = 0; } break; @@ -1727,7 +1719,6 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_td *td, switch (trb_comp_code) { case COMP_SUCCESS: frame->status = 0; - xhci_dbg(xhci, "Successful isoc transfer!\n"); break; case COMP_SHORT_TX: frame->status = td->urb->transfer_flags & URB_SHORT_NOT_OK ? @@ -1837,12 +1828,6 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, else *status = 0; } else { - if (usb_endpoint_xfer_bulk(&td->urb->ep->desc)) - xhci_dbg(xhci, "Successful bulk " - "transfer!\n"); - else - xhci_dbg(xhci, "Successful interrupt " - "transfer!\n"); *status = 0; } break; @@ -1856,11 +1841,12 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, /* Others already handled above */ break; } - xhci_dbg(xhci, "ep %#x - asked for %d bytes, " - "%d bytes untransferred\n", - td->urb->ep->desc.bEndpointAddress, - td->urb->transfer_buffer_length, - TRB_LEN(le32_to_cpu(event->transfer_len))); + if (trb_comp_code == COMP_SHORT_TX) + xhci_dbg(xhci, "ep %#x - asked for %d bytes, " + "%d bytes untransferred\n", + td->urb->ep->desc.bEndpointAddress, + td->urb->transfer_buffer_length, + TRB_LEN(le32_to_cpu(event->transfer_len))); /* Fast path - was this the last TRB in the TD for this URB? */ if (event_trb == td->last_trb) { if (TRB_LEN(le32_to_cpu(event->transfer_len)) != 0) { @@ -1954,7 +1940,6 @@ static int handle_tx_event(struct xhci_hcd *xhci, /* Endpoint ID is 1 based, our index is zero based */ ep_index = TRB_TO_EP_ID(le32_to_cpu(event->flags)) - 1; - xhci_dbg(xhci, "%s - ep index = %d\n", __func__, ep_index); ep = &xdev->eps[ep_index]; ep_ring = xhci_dma_to_transfer_ring(ep, le64_to_cpu(event->buffer)); ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index); @@ -2081,6 +2066,16 @@ static int handle_tx_event(struct xhci_hcd *xhci, if (!event_seg) { if (!ep->skip || !usb_endpoint_xfer_isoc(&td->urb->ep->desc)) { + /* Some host controllers give a spurious + * successful event after a short transfer. + * Ignore it. + */ + if ((xhci->quirks & XHCI_SPURIOUS_SUCCESS) && + ep_ring->last_td_was_short) { + ep_ring->last_td_was_short = false; + ret = 0; + goto cleanup; + } /* HC is busted, give up! */ xhci_err(xhci, "ERROR Transfer event TRB DMA ptr not " @@ -2091,6 +2086,10 @@ static int handle_tx_event(struct xhci_hcd *xhci, ret = skip_isoc_td(xhci, td, event, ep, &status); goto cleanup; } + if (trb_comp_code == COMP_SHORT_TX) + ep_ring->last_td_was_short = true; + else + ep_ring->last_td_was_short = false; if (ep->skip) { xhci_dbg(xhci, "Found td. Clear skip flag.\n"); @@ -2149,9 +2148,15 @@ cleanup: xhci_urb_free_priv(xhci, urb_priv); usb_hcd_unlink_urb_from_ep(bus_to_hcd(urb->dev->bus), urb); - xhci_dbg(xhci, "Giveback URB %p, len = %d, " - "status = %d\n", - urb, urb->actual_length, status); + if ((urb->actual_length != urb->transfer_buffer_length && + (urb->transfer_flags & + URB_SHORT_NOT_OK)) || + status != 0) + xhci_dbg(xhci, "Giveback URB %p, len = %d, " + "expected = %x, status = %d\n", + urb, urb->actual_length, + urb->transfer_buffer_length, + status); spin_unlock(&xhci->lock); usb_hcd_giveback_urb(bus_to_hcd(urb->dev->bus), urb, status); spin_lock(&xhci->lock); @@ -2180,7 +2185,6 @@ static int xhci_handle_event(struct xhci_hcd *xhci) int update_ptrs = 1; int ret; - xhci_dbg(xhci, "In %s\n", __func__); if (!xhci->event_ring || !xhci->event_ring->dequeue) { xhci->error_bitmask |= 1 << 1; return 0; @@ -2193,7 +2197,6 @@ static int xhci_handle_event(struct xhci_hcd *xhci) xhci->error_bitmask |= 1 << 2; return 0; } - xhci_dbg(xhci, "%s - OS owns TRB\n", __func__); /* * Barrier between reading the TRB_CYCLE (valid) flag above and any @@ -2203,20 +2206,14 @@ static int xhci_handle_event(struct xhci_hcd *xhci) /* FIXME: Handle more event types. */ switch ((le32_to_cpu(event->event_cmd.flags) & TRB_TYPE_BITMASK)) { case TRB_TYPE(TRB_COMPLETION): - xhci_dbg(xhci, "%s - calling handle_cmd_completion\n", __func__); handle_cmd_completion(xhci, &event->event_cmd); - xhci_dbg(xhci, "%s - returned from handle_cmd_completion\n", __func__); break; case TRB_TYPE(TRB_PORT_STATUS): - xhci_dbg(xhci, "%s - calling handle_port_status\n", __func__); handle_port_status(xhci, event); - xhci_dbg(xhci, "%s - returned from handle_port_status\n", __func__); update_ptrs = 0; break; case TRB_TYPE(TRB_TRANSFER): - xhci_dbg(xhci, "%s - calling handle_tx_event\n", __func__); ret = handle_tx_event(xhci, &event->trans_event); - xhci_dbg(xhci, "%s - returned from handle_tx_event\n", __func__); if (ret < 0) xhci->error_bitmask |= 1 << 9; else @@ -2273,16 +2270,6 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) spin_unlock(&xhci->lock); return IRQ_NONE; } - xhci_dbg(xhci, "op reg status = %08x\n", status); - xhci_dbg(xhci, "Event ring dequeue ptr:\n"); - xhci_dbg(xhci, "@%llx %08x %08x %08x %08x\n", - (unsigned long long) - xhci_trb_virt_to_dma(xhci->event_ring->deq_seg, trb), - lower_32_bits(le64_to_cpu(trb->link.segment_ptr)), - upper_32_bits(le64_to_cpu(trb->link.segment_ptr)), - (unsigned int) le32_to_cpu(trb->link.intr_target), - (unsigned int) le32_to_cpu(trb->link.control)); - if (status & STS_FATAL) { xhci_warn(xhci, "WARNING: Host System Error\n"); xhci_halt(xhci); @@ -2397,7 +2384,6 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, u32 ep_state, unsigned int num_trbs, gfp_t mem_flags) { /* Make sure the endpoint has been added to xHC schedule */ - xhci_dbg(xhci, "Endpoint state = 0x%x\n", ep_state); switch (ep_state) { case EP_STATE_DISABLED: /* @@ -2434,7 +2420,6 @@ static int prepare_ring(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, struct xhci_ring *ring = ep_ring; union xhci_trb *next; - xhci_dbg(xhci, "prepare_ring: pointing to link trb\n"); next = ring->enqueue; while (last_trb(xhci, ring, ring->enq_seg, next)) { diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 8f2a56ece44..d9660eb97eb 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1314,8 +1314,10 @@ int xhci_drop_endpoint(struct usb_hcd *hcd, struct usb_device *udev, if (ret <= 0) return ret; xhci = hcd_to_xhci(hcd); - xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev); + if (xhci->xhc_state & XHCI_STATE_DYING) + return -ENODEV; + xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev); drop_flag = xhci_get_endpoint_flag(&ep->desc); if (drop_flag == SLOT_FLAG || drop_flag == EP0_FLAG) { xhci_dbg(xhci, "xHCI %s - can't drop slot or ep 0 %#x\n", @@ -1401,6 +1403,8 @@ int xhci_add_endpoint(struct usb_hcd *hcd, struct usb_device *udev, return ret; } xhci = hcd_to_xhci(hcd); + if (xhci->xhc_state & XHCI_STATE_DYING) + return -ENODEV; added_ctxs = xhci_get_endpoint_flag(&ep->desc); last_ctx = xhci_last_valid_endpoint(added_ctxs); @@ -1578,6 +1582,113 @@ static int xhci_evaluate_context_result(struct xhci_hcd *xhci, return ret; } +static u32 xhci_count_num_new_endpoints(struct xhci_hcd *xhci, + struct xhci_container_ctx *in_ctx) +{ + struct xhci_input_control_ctx *ctrl_ctx; + u32 valid_add_flags; + u32 valid_drop_flags; + + ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx); + /* Ignore the slot flag (bit 0), and the default control endpoint flag + * (bit 1). The default control endpoint is added during the Address + * Device command and is never removed until the slot is disabled. + */ + valid_add_flags = ctrl_ctx->add_flags >> 2; + valid_drop_flags = ctrl_ctx->drop_flags >> 2; + + /* Use hweight32 to count the number of ones in the add flags, or + * number of endpoints added. Don't count endpoints that are changed + * (both added and dropped). + */ + return hweight32(valid_add_flags) - + hweight32(valid_add_flags & valid_drop_flags); +} + +static unsigned int xhci_count_num_dropped_endpoints(struct xhci_hcd *xhci, + struct xhci_container_ctx *in_ctx) +{ + struct xhci_input_control_ctx *ctrl_ctx; + u32 valid_add_flags; + u32 valid_drop_flags; + + ctrl_ctx = xhci_get_input_control_ctx(xhci, in_ctx); + valid_add_flags = ctrl_ctx->add_flags >> 2; + valid_drop_flags = ctrl_ctx->drop_flags >> 2; + + return hweight32(valid_drop_flags) - + hweight32(valid_add_flags & valid_drop_flags); +} + +/* + * We need to reserve the new number of endpoints before the configure endpoint + * command completes. We can't subtract the dropped endpoints from the number + * of active endpoints until the command completes because we can oversubscribe + * the host in this case: + * + * - the first configure endpoint command drops more endpoints than it adds + * - a second configure endpoint command that adds more endpoints is queued + * - the first configure endpoint command fails, so the config is unchanged + * - the second command may succeed, even though there isn't enough resources + * + * Must be called with xhci->lock held. + */ +static int xhci_reserve_host_resources(struct xhci_hcd *xhci, + struct xhci_container_ctx *in_ctx) +{ + u32 added_eps; + + added_eps = xhci_count_num_new_endpoints(xhci, in_ctx); + if (xhci->num_active_eps + added_eps > xhci->limit_active_eps) { + xhci_dbg(xhci, "Not enough ep ctxs: " + "%u active, need to add %u, limit is %u.\n", + xhci->num_active_eps, added_eps, + xhci->limit_active_eps); + return -ENOMEM; + } + xhci->num_active_eps += added_eps; + xhci_dbg(xhci, "Adding %u ep ctxs, %u now active.\n", added_eps, + xhci->num_active_eps); + return 0; +} + +/* + * The configure endpoint was failed by the xHC for some other reason, so we + * need to revert the resources that failed configuration would have used. + * + * Must be called with xhci->lock held. + */ +static void xhci_free_host_resources(struct xhci_hcd *xhci, + struct xhci_container_ctx *in_ctx) +{ + u32 num_failed_eps; + + num_failed_eps = xhci_count_num_new_endpoints(xhci, in_ctx); + xhci->num_active_eps -= num_failed_eps; + xhci_dbg(xhci, "Removing %u failed ep ctxs, %u now active.\n", + num_failed_eps, + xhci->num_active_eps); +} + +/* + * Now that the command has completed, clean up the active endpoint count by + * subtracting out the endpoints that were dropped (but not changed). + * + * Must be called with xhci->lock held. + */ +static void xhci_finish_resource_reservation(struct xhci_hcd *xhci, + struct xhci_container_ctx *in_ctx) +{ + u32 num_dropped_eps; + + num_dropped_eps = xhci_count_num_dropped_endpoints(xhci, in_ctx); + xhci->num_active_eps -= num_dropped_eps; + if (num_dropped_eps) + xhci_dbg(xhci, "Removing %u dropped ep ctxs, %u now active.\n", + num_dropped_eps, + xhci->num_active_eps); +} + /* Issue a configure endpoint command or evaluate context command * and wait for it to finish. */ @@ -1598,6 +1709,15 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, virt_dev = xhci->devs[udev->slot_id]; if (command) { in_ctx = command->in_ctx; + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK) && + xhci_reserve_host_resources(xhci, in_ctx)) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_warn(xhci, "Not enough host resources, " + "active endpoint contexts = %u\n", + xhci->num_active_eps); + return -ENOMEM; + } + cmd_completion = command->completion; cmd_status = &command->status; command->command_trb = xhci->cmd_ring->enqueue; @@ -1613,6 +1733,14 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, list_add_tail(&command->cmd_list, &virt_dev->cmd_list); } else { in_ctx = virt_dev->in_ctx; + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK) && + xhci_reserve_host_resources(xhci, in_ctx)) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_warn(xhci, "Not enough host resources, " + "active endpoint contexts = %u\n", + xhci->num_active_eps); + return -ENOMEM; + } cmd_completion = &virt_dev->cmd_completion; cmd_status = &virt_dev->cmd_status; } @@ -1627,6 +1755,8 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, if (ret < 0) { if (command) list_del(&command->cmd_list); + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) + xhci_free_host_resources(xhci, in_ctx); spin_unlock_irqrestore(&xhci->lock, flags); xhci_dbg(xhci, "FIXME allocate a new ring segment\n"); return -ENOMEM; @@ -1649,8 +1779,22 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, } if (!ctx_change) - return xhci_configure_endpoint_result(xhci, udev, cmd_status); - return xhci_evaluate_context_result(xhci, udev, cmd_status); + ret = xhci_configure_endpoint_result(xhci, udev, cmd_status); + else + ret = xhci_evaluate_context_result(xhci, udev, cmd_status); + + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) { + spin_lock_irqsave(&xhci->lock, flags); + /* If the command failed, remove the reserved resources. + * Otherwise, clean up the estimate to include dropped eps. + */ + if (ret) + xhci_free_host_resources(xhci, in_ctx); + else + xhci_finish_resource_reservation(xhci, in_ctx); + spin_unlock_irqrestore(&xhci->lock, flags); + } + return ret; } /* Called after one or more calls to xhci_add_endpoint() or @@ -1676,6 +1820,8 @@ int xhci_check_bandwidth(struct usb_hcd *hcd, struct usb_device *udev) if (ret <= 0) return ret; xhci = hcd_to_xhci(hcd); + if (xhci->xhc_state & XHCI_STATE_DYING) + return -ENODEV; xhci_dbg(xhci, "%s called for udev %p\n", __func__, udev); virt_dev = xhci->devs[udev->slot_id]; @@ -2266,6 +2412,34 @@ int xhci_free_streams(struct usb_hcd *hcd, struct usb_device *udev, } /* + * Deletes endpoint resources for endpoints that were active before a Reset + * Device command, or a Disable Slot command. The Reset Device command leaves + * the control endpoint intact, whereas the Disable Slot command deletes it. + * + * Must be called with xhci->lock held. + */ +void xhci_free_device_endpoint_resources(struct xhci_hcd *xhci, + struct xhci_virt_device *virt_dev, bool drop_control_ep) +{ + int i; + unsigned int num_dropped_eps = 0; + unsigned int drop_flags = 0; + + for (i = (drop_control_ep ? 0 : 1); i < 31; i++) { + if (virt_dev->eps[i].ring) { + drop_flags |= 1 << i; + num_dropped_eps++; + } + } + xhci->num_active_eps -= num_dropped_eps; + if (num_dropped_eps) + xhci_dbg(xhci, "Dropped %u ep ctxs, flags = 0x%x, " + "%u now active.\n", + num_dropped_eps, drop_flags, + xhci->num_active_eps); +} + +/* * This submits a Reset Device Command, which will set the device state to 0, * set the device address to 0, and disable all the endpoints except the default * control endpoint. The USB core should come back and call @@ -2406,6 +2580,14 @@ int xhci_discover_or_reset_device(struct usb_hcd *hcd, struct usb_device *udev) goto command_cleanup; } + /* Free up host controller endpoint resources */ + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) { + spin_lock_irqsave(&xhci->lock, flags); + /* Don't delete the default control endpoint resources */ + xhci_free_device_endpoint_resources(xhci, virt_dev, false); + spin_unlock_irqrestore(&xhci->lock, flags); + } + /* Everything but endpoint 0 is disabled, so free or cache the rings. */ last_freed_endpoint = 1; for (i = 1; i < 31; ++i) { @@ -2479,6 +2661,27 @@ void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) } /* + * Checks if we have enough host controller resources for the default control + * endpoint. + * + * Must be called with xhci->lock held. + */ +static int xhci_reserve_host_control_ep_resources(struct xhci_hcd *xhci) +{ + if (xhci->num_active_eps + 1 > xhci->limit_active_eps) { + xhci_dbg(xhci, "Not enough ep ctxs: " + "%u active, need to add 1, limit is %u.\n", + xhci->num_active_eps, xhci->limit_active_eps); + return -ENOMEM; + } + xhci->num_active_eps += 1; + xhci_dbg(xhci, "Adding 1 ep ctx, %u now active.\n", + xhci->num_active_eps); + return 0; +} + + +/* * Returns 0 if the xHC ran out of device slots, the Enable Slot command * timed out, or allocating memory failed. Returns 1 on success. */ @@ -2513,24 +2716,39 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) xhci_err(xhci, "Error while assigning device slot ID\n"); return 0; } - /* xhci_alloc_virt_device() does not touch rings; no need to lock. - * Use GFP_NOIO, since this function can be called from + + if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) { + spin_lock_irqsave(&xhci->lock, flags); + ret = xhci_reserve_host_control_ep_resources(xhci); + if (ret) { + spin_unlock_irqrestore(&xhci->lock, flags); + xhci_warn(xhci, "Not enough host resources, " + "active endpoint contexts = %u\n", + xhci->num_active_eps); + goto disable_slot; + } + spin_unlock_irqrestore(&xhci->lock, flags); + } + /* Use GFP_NOIO, since this function can be called from * xhci_discover_or_reset_device(), which may be called as part of * mass storage driver error handling. */ if (!xhci_alloc_virt_device(xhci, xhci->slot_id, udev, GFP_NOIO)) { - /* Disable slot, if we can do it without mem alloc */ xhci_warn(xhci, "Could not allocate xHCI USB device data structures\n"); - spin_lock_irqsave(&xhci->lock, flags); - if (!xhci_queue_slot_control(xhci, TRB_DISABLE_SLOT, udev->slot_id)) - xhci_ring_cmd_db(xhci); - spin_unlock_irqrestore(&xhci->lock, flags); - return 0; + goto disable_slot; } udev->slot_id = xhci->slot_id; /* Is this a LS or FS device under a HS hub? */ /* Hub or peripherial? */ return 1; + +disable_slot: + /* Disable slot, if we can do it without mem alloc */ + spin_lock_irqsave(&xhci->lock, flags); + if (!xhci_queue_slot_control(xhci, TRB_DISABLE_SLOT, udev->slot_id)) + xhci_ring_cmd_db(xhci); + spin_unlock_irqrestore(&xhci->lock, flags); + return 0; } /* diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index e12db7cfb9b..ac0196e7fcf 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1123,6 +1123,7 @@ struct xhci_ring { */ u32 cycle_state; unsigned int stream_id; + bool last_td_was_short; }; struct xhci_erst_entry { @@ -1290,6 +1291,19 @@ struct xhci_hcd { #define XHCI_RESET_EP_QUIRK (1 << 1) #define XHCI_NEC_HOST (1 << 2) #define XHCI_AMD_PLL_FIX (1 << 3) +#define XHCI_SPURIOUS_SUCCESS (1 << 4) +/* + * Certain Intel host controllers have a limit to the number of endpoint + * contexts they can handle. Ideally, they would signal that they can't handle + * anymore endpoint contexts by returning a Resource Error for the Configure + * Endpoint command, but they don't. Instead they expect software to keep track + * of the number of active endpoints for them, across configure endpoint + * commands, reset device commands, disable slot commands, and address device + * commands. + */ +#define XHCI_EP_LIMIT_QUIRK (1 << 5) + unsigned int num_active_eps; + unsigned int limit_active_eps; /* There are two roothubs to keep track of bus suspend info for */ struct xhci_bus_state bus_state[2]; /* Is each xHCI roothub port a USB 3.0, USB 2.0, or USB 1.1 port? */ @@ -1338,9 +1352,6 @@ static inline unsigned int xhci_readl(const struct xhci_hcd *xhci, static inline void xhci_writel(struct xhci_hcd *xhci, const unsigned int val, __le32 __iomem *regs) { - xhci_dbg(xhci, - "`MEM_WRITE_DWORD(3'b000, 32'h%p, 32'h%0x, 4'hf);\n", - regs, val); writel(val, regs); } @@ -1368,9 +1379,6 @@ static inline void xhci_write_64(struct xhci_hcd *xhci, u32 val_lo = lower_32_bits(val); u32 val_hi = upper_32_bits(val); - xhci_dbg(xhci, - "`MEM_WRITE_DWORD(3'b000, 64'h%p, 64'h%0lx, 4'hf);\n", - regs, (long unsigned int) val); writel(val_lo, ptr); writel(val_hi, ptr + 1); } @@ -1439,6 +1447,8 @@ void xhci_setup_streams_ep_input_ctx(struct xhci_hcd *xhci, void xhci_setup_no_streams_ep_input_ctx(struct xhci_hcd *xhci, struct xhci_ep_ctx *ep_ctx, struct xhci_virt_ep *ep); +void xhci_free_device_endpoint_resources(struct xhci_hcd *xhci, + struct xhci_virt_device *virt_dev, bool drop_control_ep); struct xhci_ring *xhci_dma_to_transfer_ring( struct xhci_virt_ep *ep, u64 address); diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2f7c76a85e5..e224a92baa1 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -144,7 +144,7 @@ static void handle_tx(struct vhost_net *net) } mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); if (wmem < sock->sk->sk_sndbuf / 2) tx_poll_stop(net); @@ -166,8 +166,8 @@ static void handle_tx(struct vhost_net *net) set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); break; } - if (unlikely(vhost_enable_notify(vq))) { - vhost_disable_notify(vq); + if (unlikely(vhost_enable_notify(&net->dev, vq))) { + vhost_disable_notify(&net->dev, vq); continue; } break; @@ -315,7 +315,7 @@ static void handle_rx(struct vhost_net *net) return; mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); vhost_hlen = vq->vhost_hlen; sock_hlen = vq->sock_hlen; @@ -334,10 +334,10 @@ static void handle_rx(struct vhost_net *net) break; /* OK, now we need to know about added descriptors. */ if (!headcount) { - if (unlikely(vhost_enable_notify(vq))) { + if (unlikely(vhost_enable_notify(&net->dev, vq))) { /* They have slipped one in as we were * doing that: check again. */ - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); continue; } /* Nothing new? Wait for eventfd to tell us diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 099f30230d0..734e1d74ad8 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -49,7 +49,7 @@ static void handle_vq(struct vhost_test *n) return; mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&n->dev, vq); for (;;) { head = vhost_get_vq_desc(&n->dev, vq, vq->iov, @@ -61,8 +61,8 @@ static void handle_vq(struct vhost_test *n) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { - if (unlikely(vhost_enable_notify(vq))) { - vhost_disable_notify(vq); + if (unlikely(vhost_enable_notify(&n->dev, vq))) { + vhost_disable_notify(&n->dev, vq); continue; } break; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 7aa4eea930f..ea966b35635 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -37,6 +37,9 @@ enum { VHOST_MEMORY_F_LOG = 0x1, }; +#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) +#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -161,6 +164,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->last_avail_idx = 0; vq->avail_idx = 0; vq->last_used_idx = 0; + vq->signalled_used = 0; + vq->signalled_used_valid = false; vq->used_flags = 0; vq->log_used = false; vq->log_addr = -1ull; @@ -489,16 +494,17 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, return 1; } -static int vq_access_ok(unsigned int num, +static int vq_access_ok(struct vhost_dev *d, unsigned int num, struct vring_desc __user *desc, struct vring_avail __user *avail, struct vring_used __user *used) { + size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return access_ok(VERIFY_READ, desc, num * sizeof *desc) && access_ok(VERIFY_READ, avail, - sizeof *avail + num * sizeof *avail->ring) && + sizeof *avail + num * sizeof *avail->ring + s) && access_ok(VERIFY_WRITE, used, - sizeof *used + num * sizeof *used->ring); + sizeof *used + num * sizeof *used->ring + s); } /* Can we log writes? */ @@ -514,9 +520,11 @@ int vhost_log_access_ok(struct vhost_dev *dev) /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ -static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) +static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq, + void __user *log_base) { struct vhost_memory *mp; + size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; mp = rcu_dereference_protected(vq->dev->memory, lockdep_is_held(&vq->mutex)); @@ -524,15 +532,15 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + - vq->num * sizeof *vq->used->ring)); + vq->num * sizeof *vq->used->ring + s)); } /* Can we start vq? */ /* Caller should have vq mutex and device mutex */ int vhost_vq_access_ok(struct vhost_virtqueue *vq) { - return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) && - vq_log_access_ok(vq, vq->log_base); + return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) && + vq_log_access_ok(vq->dev, vq, vq->log_base); } static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) @@ -577,6 +585,7 @@ static int init_used(struct vhost_virtqueue *vq, if (r) return r; + vq->signalled_used_valid = false; return get_user(vq->last_used_idx, &used->idx); } @@ -674,7 +683,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) * If it is not, we don't as size might not have been setup. * We will verify when backend is configured. */ if (vq->private_data) { - if (!vq_access_ok(vq->num, + if (!vq_access_ok(d, vq->num, (void __user *)(unsigned long)a.desc_user_addr, (void __user *)(unsigned long)a.avail_user_addr, (void __user *)(unsigned long)a.used_user_addr)) { @@ -818,7 +827,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg) vq = d->vqs + i; mutex_lock(&vq->mutex); /* If ring is inactive, will check when it's enabled. */ - if (vq->private_data && !vq_log_access_ok(vq, base)) + if (vq->private_data && !vq_log_access_ok(d, vq, base)) r = -EFAULT; else vq->log_base = base; @@ -1219,6 +1228,10 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, /* On success, increment avail index. */ vq->last_avail_idx++; + + /* Assume notifications from guest are disabled at this point, + * if they aren't we would need to update avail_event index. */ + BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); return head; } @@ -1267,6 +1280,12 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) eventfd_signal(vq->log_ctx, 1); } vq->last_used_idx++; + /* If the driver never bothers to signal in a very long while, + * used index might wrap around. If that happens, invalidate + * signalled_used index we stored. TODO: make sure driver + * signals at least once in 2^16 and remove this. */ + if (unlikely(vq->last_used_idx == vq->signalled_used)) + vq->signalled_used_valid = false; return 0; } @@ -1275,6 +1294,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, unsigned count) { struct vring_used_elem __user *used; + u16 old, new; int start; start = vq->last_used_idx % vq->num; @@ -1292,7 +1312,14 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, ((void __user *)used - (void __user *)vq->used), count * sizeof *used); } - vq->last_used_idx += count; + old = vq->last_used_idx; + new = (vq->last_used_idx += count); + /* If the driver never bothers to signal in a very long while, + * used index might wrap around. If that happens, invalidate + * signalled_used index we stored. TODO: make sure driver + * signals at least once in 2^16 and remove this. */ + if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) + vq->signalled_used_valid = false; return 0; } @@ -1331,29 +1358,47 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, return r; } -/* This actually signals the guest, using eventfd. */ -void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) +static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { - __u16 flags; - + __u16 old, new, event; + bool v; /* Flush out used index updates. This is paired * with the barrier that the Guest executes when enabling * interrupts. */ smp_mb(); - if (__get_user(flags, &vq->avail->flags)) { - vq_err(vq, "Failed to get flags"); - return; + if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && + unlikely(vq->avail_idx == vq->last_avail_idx)) + return true; + + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + __u16 flags; + if (__get_user(flags, &vq->avail->flags)) { + vq_err(vq, "Failed to get flags"); + return true; + } + return !(flags & VRING_AVAIL_F_NO_INTERRUPT); } + old = vq->signalled_used; + v = vq->signalled_used_valid; + new = vq->signalled_used = vq->last_used_idx; + vq->signalled_used_valid = true; - /* If they don't want an interrupt, don't signal, unless empty. */ - if ((flags & VRING_AVAIL_F_NO_INTERRUPT) && - (vq->avail_idx != vq->last_avail_idx || - !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY))) - return; + if (unlikely(!v)) + return true; + if (get_user(event, vhost_used_event(vq))) { + vq_err(vq, "Failed to get used event idx"); + return true; + } + return vring_need_event(event, new, old); +} + +/* This actually signals the guest, using eventfd. */ +void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) +{ /* Signal the Guest tell them we used something up. */ - if (vq->call_ctx) + if (vq->call_ctx && vhost_notify(dev, vq)) eventfd_signal(vq->call_ctx, 1); } @@ -1376,7 +1421,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, } /* OK, now we need to know about added descriptors. */ -bool vhost_enable_notify(struct vhost_virtqueue *vq) +bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { u16 avail_idx; int r; @@ -1384,11 +1429,34 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) return false; vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; - r = put_user(vq->used_flags, &vq->used->flags); - if (r) { - vq_err(vq, "Failed to enable notification at %p: %d\n", - &vq->used->flags, r); - return false; + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + r = put_user(vq->used_flags, &vq->used->flags); + if (r) { + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); + return false; + } + } else { + r = put_user(vq->avail_idx, vhost_avail_event(vq)); + if (r) { + vq_err(vq, "Failed to update avail event index at %p: %d\n", + vhost_avail_event(vq), r); + return false; + } + } + if (unlikely(vq->log_used)) { + void __user *used; + /* Make sure data is seen before log. */ + smp_wmb(); + used = vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ? + &vq->used->flags : vhost_avail_event(vq); + /* Log used flags or event index entry write. Both are 16 bit + * fields. */ + log_write(vq->log_base, vq->log_addr + + (used - (void __user *)vq->used), + sizeof(u16)); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); } /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ @@ -1404,15 +1472,17 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) } /* We don't need to be notified again. */ -void vhost_disable_notify(struct vhost_virtqueue *vq) +void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (vq->used_flags & VRING_USED_F_NO_NOTIFY) return; vq->used_flags |= VRING_USED_F_NO_NOTIFY; - r = put_user(vq->used_flags, &vq->used->flags); - if (r) - vq_err(vq, "Failed to enable notification at %p: %d\n", - &vq->used->flags, r); + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + r = put_user(vq->used_flags, &vq->used->flags); + if (r) + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); + } } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index b3363ae3851..8e03379dd30 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -84,6 +84,12 @@ struct vhost_virtqueue { /* Used flags */ u16 used_flags; + /* Last used index value we have signalled on */ + u16 signalled_used; + + /* Last used index value we have signalled on */ + bool signalled_used_valid; + /* Log writes to used structure. */ bool log_used; u64 log_addr; @@ -149,8 +155,8 @@ void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *, void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); -void vhost_disable_notify(struct vhost_virtqueue *); -bool vhost_enable_notify(struct vhost_virtqueue *); +void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); +bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len); @@ -162,11 +168,12 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, } while (0) enum { - VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) | - (1 << VIRTIO_RING_F_INDIRECT_DESC) | - (1 << VHOST_F_LOG_ALL) | - (1 << VHOST_NET_F_VIRTIO_NET_HDR) | - (1 << VIRTIO_NET_F_MRG_RXBUF), + VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | + (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX) | + (1ULL << VHOST_F_LOG_ALL) | + (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | + (1ULL << VIRTIO_NET_F_MRG_RXBUF), }; static inline int vhost_has_feature(struct vhost_dev *dev, int bit) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 0f1da45ba47..e058ace2a4a 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -40,9 +40,6 @@ struct virtio_balloon /* Waiting for host to ack the pages we released. */ struct completion acked; - /* Do we have to tell Host *before* we reuse pages? */ - bool tell_host_first; - /* The pages we've told the Host we're not using. */ unsigned int num_pages; struct list_head pages; @@ -151,13 +148,14 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) vb->num_pages--; } - if (vb->tell_host_first) { - tell_host(vb, vb->deflate_vq); - release_pages_by_pfn(vb->pfns, vb->num_pfns); - } else { - release_pages_by_pfn(vb->pfns, vb->num_pfns); - tell_host(vb, vb->deflate_vq); - } + + /* + * Note that if + * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); + * is true, we *have* to do it in this order + */ + tell_host(vb, vb->deflate_vq); + release_pages_by_pfn(vb->pfns, vb->num_pfns); } static inline void update_stat(struct virtio_balloon *vb, int idx, @@ -325,9 +323,6 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_del_vqs; } - vb->tell_host_first - = virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); - return 0; out_del_vqs: diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b0043fb26a4..68b9136847a 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -82,6 +82,9 @@ struct vring_virtqueue /* Host supports indirect buffers */ bool indirect; + /* Host publishes avail event idx */ + bool event; + /* Number of free buffers */ unsigned int num_free; /* Head of free buffer list. */ @@ -237,18 +240,22 @@ EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp); void virtqueue_kick(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + u16 new, old; START_USE(vq); /* Descriptors and available array need to be set before we expose the * new available array entries. */ virtio_wmb(); - vq->vring.avail->idx += vq->num_added; + old = vq->vring.avail->idx; + new = vq->vring.avail->idx = old + vq->num_added; vq->num_added = 0; /* Need to update avail index before checking if we should notify */ virtio_mb(); - if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) + if (vq->event ? + vring_need_event(vring_avail_event(&vq->vring), new, old) : + !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) /* Prod other side to tell it about changes. */ vq->notify(&vq->vq); @@ -324,6 +331,14 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) ret = vq->data[i]; detach_buf(vq, i); vq->last_used_idx++; + /* If we expect an interrupt for the next entry, tell host + * by writing event index and flush out the write before + * the read in the next get_buf call. */ + if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { + vring_used_event(&vq->vring) = vq->last_used_idx; + virtio_mb(); + } + END_USE(vq); return ret; } @@ -345,7 +360,11 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) /* We optimistically turn back on interrupts, then check if there was * more to do. */ + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + vring_used_event(&vq->vring) = vq->last_used_idx; virtio_mb(); if (unlikely(more_used(vq))) { END_USE(vq); @@ -357,6 +376,33 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_enable_cb); +bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 bufs; + + START_USE(vq); + + /* We optimistically turn back on interrupts, then check if there was + * more to do. */ + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ + vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + /* TODO: tune this threshold */ + bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; + vring_used_event(&vq->vring) = vq->last_used_idx + bufs; + virtio_mb(); + if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { + END_USE(vq); + return false; + } + + END_USE(vq); + return true; +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); + void *virtqueue_detach_unused_buf(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -438,6 +484,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, #endif vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); + vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); /* No callback? Tell other side not to bother us. */ if (!callback) @@ -472,6 +519,8 @@ void vring_transport_features(struct virtio_device *vdev) switch (i) { case VIRTIO_RING_F_INDIRECT_DESC: break; + case VIRTIO_RING_F_EVENT_IDX: + break; default: /* We don't understand this bit. */ clear_bit(i, vdev->features); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 8d7f3e69ae2..7f6c6770319 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -814,7 +814,6 @@ int v9fs_vfs_unlink(struct inode *i, struct dentry *d) int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) { - dentry_unhash(d); return v9fs_remove(i, d, 1); } @@ -840,9 +839,6 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct p9_fid *newdirfid; struct p9_wstat wstat; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - P9_DPRINTK(P9_DEBUG_VFS, "\n"); retval = 0; old_inode = old_dentry->d_inode; diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 03330e2e390..e3e9efc1fdd 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -320,8 +320,6 @@ affs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_ino, (int)dentry->d_name.len, dentry->d_name.name); - dentry_unhash(dentry); - return affs_remove_header(dentry); } @@ -419,9 +417,6 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, struct buffer_head *bh = NULL; int retval; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - pr_debug("AFFS: rename(old=%u,\"%*s\" to new=%u,\"%*s\")\n", (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 2c4e0516004..20c106f2492 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -845,8 +845,6 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) _enter("{%x:%u},{%s}", dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name); - dentry_unhash(dentry); - ret = -ENAMETOOLONG; if (dentry->d_name.len >= AFSNAMEMAX) goto error; @@ -1148,9 +1146,6 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, struct key *key; int ret; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - vnode = AFS_FS_I(old_dentry->d_inode); orig_dvnode = AFS_FS_I(old_dir); new_dvnode = AFS_FS_I(new_dir); diff --git a/fs/attr.c b/fs/attr.c index 91dbe2a107f..caf2aa521e2 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -175,6 +175,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr) return -EPERM; } + if ((ia_valid & ATTR_MODE)) { + mode_t amode = attr->ia_mode; + /* Flag setting protected by i_mutex */ + if (is_sxid(amode)) + inode->i_flags &= ~S_NOSEC; + } + now = current_fs_time(inode->i_sb); attr->ia_ctime = now; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 87d95a8cddb..f55ae23b137 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -583,8 +583,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) return -EACCES; - dentry_unhash(dentry); - if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); if (p_ino && dentry->d_parent != dentry) diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index c7d1d06b048..b14cebfd904 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -224,9 +224,6 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct bfs_sb_info *info; int error = -ENOENT; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - old_bh = new_bh = NULL; old_inode = old_dentry->d_inode; if (S_ISDIR(old_inode->i_mode)) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 332323e19dd..6c093fa98f6 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2524,7 +2524,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); -void btrfs_dirty_inode(struct inode *inode); +void btrfs_dirty_inode(struct inode *inode, int flags); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bb51bb1fa44..39a9d5750ef 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4294,7 +4294,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. */ -void btrfs_dirty_inode(struct inode *inode) +void btrfs_dirty_inode(struct inode *inode, int flags) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; diff --git a/fs/buffer.c b/fs/buffer.c index 698c6b2cc46..49c9aada037 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2382,6 +2382,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, ret = -EAGAIN; goto out_unlock; } + wait_on_page_writeback(page); return 0; out_unlock: unlock_page(page); diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 8f1700623b4..21de1d6d584 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -74,8 +74,9 @@ shrink_idmap_tree(struct rb_root *root, int nr_to_scan, int *nr_rem, * Run idmap cache shrinker. */ static int -cifs_idmap_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) +cifs_idmap_shrinker(struct shrinker *shrink, struct shrink_control *sc) { + int nr_to_scan = sc->nr_to_scan; int nr_del = 0; int nr_rem = 0; struct rb_root *root; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index a46126fd573..2b8dae4d121 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -336,8 +336,6 @@ static int coda_rmdir(struct inode *dir, struct dentry *de) int len = de->d_name.len; int error; - dentry_unhash(de); - error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); if (!error) { /* VFS may delete the child */ @@ -361,9 +359,6 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, int new_length = new_dentry->d_name.len; int error; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), coda_i2f(new_dir), old_length, new_length, (const char *) old_name, (const char *)new_name); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 9d17d350abc..9a37a9b6de3 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1359,8 +1359,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; - dentry_unhash(dentry); - if (dentry->d_parent == configfs_sb->s_root) return -EPERM; diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index b8d5c809102..58609bde3b9 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1024,25 +1024,25 @@ out: } /** - * contains_ecryptfs_marker - check for the ecryptfs marker + * ecryptfs_validate_marker - check for the ecryptfs marker * @data: The data block in which to check * - * Returns one if marker found; zero if not found + * Returns zero if marker found; -EINVAL if not found */ -static int contains_ecryptfs_marker(char *data) +static int ecryptfs_validate_marker(char *data) { u32 m_1, m_2; m_1 = get_unaligned_be32(data); m_2 = get_unaligned_be32(data + 4); if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) - return 1; + return 0; ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " "MAGIC_ECRYPTFS_MARKER = [0x%.8x]\n", m_1, m_2, MAGIC_ECRYPTFS_MARKER); ecryptfs_printk(KERN_DEBUG, "(m_1 ^ MAGIC_ECRYPTFS_MARKER) = " "[0x%.8x]\n", (m_1 ^ MAGIC_ECRYPTFS_MARKER)); - return 0; + return -EINVAL; } struct ecryptfs_flag_map_elem { @@ -1201,27 +1201,19 @@ int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code) return rc; } -int ecryptfs_read_and_validate_header_region(char *data, - struct inode *ecryptfs_inode) +int ecryptfs_read_and_validate_header_region(struct inode *inode) { - struct ecryptfs_crypt_stat *crypt_stat = - &(ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat); + u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES]; + u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES; int rc; - if (crypt_stat->extent_size == 0) - crypt_stat->extent_size = ECRYPTFS_DEFAULT_EXTENT_SIZE; - rc = ecryptfs_read_lower(data, 0, crypt_stat->extent_size, - ecryptfs_inode); - if (rc < 0) { - printk(KERN_ERR "%s: Error reading header region; rc = [%d]\n", - __func__, rc); - goto out; - } - if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES)) { - rc = -EINVAL; - } else - rc = 0; -out: + rc = ecryptfs_read_lower(file_size, 0, ECRYPTFS_SIZE_AND_MARKER_BYTES, + inode); + if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) + return rc >= 0 ? -EINVAL : rc; + rc = ecryptfs_validate_marker(marker); + if (!rc) + ecryptfs_i_size_init(file_size, inode); return rc; } @@ -1242,8 +1234,7 @@ ecryptfs_write_header_metadata(char *virt, (*written) = 6; } -struct kmem_cache *ecryptfs_header_cache_1; -struct kmem_cache *ecryptfs_header_cache_2; +struct kmem_cache *ecryptfs_header_cache; /** * ecryptfs_write_headers_virt @@ -1496,11 +1487,9 @@ static int ecryptfs_read_headers_virt(char *page_virt, crypt_stat->mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; offset = ECRYPTFS_FILE_SIZE_BYTES; - rc = contains_ecryptfs_marker(page_virt + offset); - if (rc == 0) { - rc = -EINVAL; + rc = ecryptfs_validate_marker(page_virt + offset); + if (rc) goto out; - } if (!(crypt_stat->flags & ECRYPTFS_I_SIZE_INITIALIZED)) ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode); offset += MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; @@ -1567,20 +1556,21 @@ out: return rc; } -int ecryptfs_read_and_validate_xattr_region(char *page_virt, - struct dentry *ecryptfs_dentry) +int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, + struct inode *inode) { + u8 file_size[ECRYPTFS_SIZE_AND_MARKER_BYTES]; + u8 *marker = file_size + ECRYPTFS_FILE_SIZE_BYTES; int rc; - rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry->d_inode); - if (rc) - goto out; - if (!contains_ecryptfs_marker(page_virt + ECRYPTFS_FILE_SIZE_BYTES)) { - printk(KERN_WARNING "Valid data found in [%s] xattr, but " - "the marker is invalid\n", ECRYPTFS_XATTR_NAME); - rc = -EINVAL; - } -out: + rc = ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), + ECRYPTFS_XATTR_NAME, file_size, + ECRYPTFS_SIZE_AND_MARKER_BYTES); + if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) + return rc >= 0 ? -EINVAL : rc; + rc = ecryptfs_validate_marker(marker); + if (!rc) + ecryptfs_i_size_init(file_size, inode); return rc; } @@ -1610,7 +1600,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat, mount_crypt_stat); /* Read the first page from the underlying file */ - page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER); + page_virt = kmem_cache_alloc(ecryptfs_header_cache, GFP_USER); if (!page_virt) { rc = -ENOMEM; printk(KERN_ERR "%s: Unable to allocate page_virt\n", @@ -1655,7 +1645,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry) out: if (page_virt) { memset(page_virt, 0, PAGE_CACHE_SIZE); - kmem_cache_free(ecryptfs_header_cache_1, page_virt); + kmem_cache_free(ecryptfs_header_cache, page_virt); } return rc; } diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index e70282775e2..43c7c43b06f 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -200,6 +200,8 @@ ecryptfs_get_key_payload_data(struct key *key) #define MAGIC_ECRYPTFS_MARKER 0x3c81b7f5 #define MAGIC_ECRYPTFS_MARKER_SIZE_BYTES 8 /* 4*2 */ #define ECRYPTFS_FILE_SIZE_BYTES (sizeof(u64)) +#define ECRYPTFS_SIZE_AND_MARKER_BYTES (ECRYPTFS_FILE_SIZE_BYTES \ + + MAGIC_ECRYPTFS_MARKER_SIZE_BYTES) #define ECRYPTFS_DEFAULT_CIPHER "aes" #define ECRYPTFS_DEFAULT_KEY_BYTES 16 #define ECRYPTFS_DEFAULT_HASH "md5" @@ -603,8 +605,7 @@ extern struct kmem_cache *ecryptfs_file_info_cache; extern struct kmem_cache *ecryptfs_dentry_info_cache; extern struct kmem_cache *ecryptfs_inode_info_cache; extern struct kmem_cache *ecryptfs_sb_info_cache; -extern struct kmem_cache *ecryptfs_header_cache_1; -extern struct kmem_cache *ecryptfs_header_cache_2; +extern struct kmem_cache *ecryptfs_header_cache; extern struct kmem_cache *ecryptfs_xattr_cache; extern struct kmem_cache *ecryptfs_key_record_cache; extern struct kmem_cache *ecryptfs_key_sig_cache; @@ -625,14 +626,9 @@ struct ecryptfs_open_req { struct list_head kthread_ctl_list; }; -#define ECRYPTFS_INTERPOSE_FLAG_D_ADD 0x00000001 -int ecryptfs_interpose(struct dentry *hidden_dentry, - struct dentry *this_dentry, struct super_block *sb, - u32 flags); +struct inode *ecryptfs_get_inode(struct inode *lower_inode, + struct super_block *sb); void ecryptfs_i_size_init(const char *page_virt, struct inode *inode); -int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, - struct dentry *lower_dentry, - struct inode *ecryptfs_dir_inode); int ecryptfs_decode_and_decrypt_filename(char **decrypted_name, size_t *decrypted_name_size, struct dentry *ecryptfs_dentry, @@ -664,10 +660,9 @@ int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry); void ecryptfs_write_crypt_stat_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat, size_t *written); -int ecryptfs_read_and_validate_header_region(char *data, - struct inode *ecryptfs_inode); -int ecryptfs_read_and_validate_xattr_region(char *page_virt, - struct dentry *ecryptfs_dentry); +int ecryptfs_read_and_validate_header_region(struct inode *inode); +int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, + struct inode *inode); u8 ecryptfs_code_for_cipher_string(char *cipher_name, size_t key_bytes); int ecryptfs_cipher_code_to_string(char *str, u8 cipher_code); void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat); @@ -679,9 +674,6 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, unsigned char *src, struct dentry *ecryptfs_dentry); int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); -int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode); -int ecryptfs_inode_set(struct inode *inode, void *lower_inode); -void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode); ssize_t ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, void *value, size_t size); @@ -761,7 +753,7 @@ int ecryptfs_privileged_open(struct file **lower_file, struct dentry *lower_dentry, struct vfsmount *lower_mnt, const struct cred *cred); -int ecryptfs_get_lower_file(struct dentry *ecryptfs_dentry); +int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode); void ecryptfs_put_lower_file(struct inode *inode); int ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes, diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 566e5472f78..4ec9eb00a24 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -191,7 +191,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file) | ECRYPTFS_ENCRYPTED); } mutex_unlock(&crypt_stat->cs_mutex); - rc = ecryptfs_get_lower_file(ecryptfs_dentry); + rc = ecryptfs_get_lower_file(ecryptfs_dentry, inode); if (rc) { printk(KERN_ERR "%s: Error attempting to initialize " "the lower file for the dentry with name " diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index bc116b9ffcf..7349ade17de 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -51,6 +51,97 @@ static void unlock_dir(struct dentry *dir) dput(dir); } +static int ecryptfs_inode_test(struct inode *inode, void *lower_inode) +{ + if (ecryptfs_inode_to_lower(inode) == (struct inode *)lower_inode) + return 1; + return 0; +} + +static int ecryptfs_inode_set(struct inode *inode, void *opaque) +{ + struct inode *lower_inode = opaque; + + ecryptfs_set_inode_lower(inode, lower_inode); + fsstack_copy_attr_all(inode, lower_inode); + /* i_size will be overwritten for encrypted regular files */ + fsstack_copy_inode_size(inode, lower_inode); + inode->i_ino = lower_inode->i_ino; + inode->i_version++; + inode->i_mapping->a_ops = &ecryptfs_aops; + + if (S_ISLNK(inode->i_mode)) + inode->i_op = &ecryptfs_symlink_iops; + else if (S_ISDIR(inode->i_mode)) + inode->i_op = &ecryptfs_dir_iops; + else + inode->i_op = &ecryptfs_main_iops; + + if (S_ISDIR(inode->i_mode)) + inode->i_fop = &ecryptfs_dir_fops; + else if (special_file(inode->i_mode)) + init_special_inode(inode, inode->i_mode, inode->i_rdev); + else + inode->i_fop = &ecryptfs_main_fops; + + return 0; +} + +static struct inode *__ecryptfs_get_inode(struct inode *lower_inode, + struct super_block *sb) +{ + struct inode *inode; + + if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) + return ERR_PTR(-EXDEV); + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + inode = iget5_locked(sb, (unsigned long)lower_inode, + ecryptfs_inode_test, ecryptfs_inode_set, + lower_inode); + if (!inode) { + iput(lower_inode); + return ERR_PTR(-EACCES); + } + if (!(inode->i_state & I_NEW)) + iput(lower_inode); + + return inode; +} + +struct inode *ecryptfs_get_inode(struct inode *lower_inode, + struct super_block *sb) +{ + struct inode *inode = __ecryptfs_get_inode(lower_inode, sb); + + if (!IS_ERR(inode) && (inode->i_state & I_NEW)) + unlock_new_inode(inode); + + return inode; +} + +/** + * ecryptfs_interpose + * @lower_dentry: Existing dentry in the lower filesystem + * @dentry: ecryptfs' dentry + * @sb: ecryptfs's super_block + * + * Interposes upper and lower dentries. + * + * Returns zero on success; non-zero otherwise + */ +static int ecryptfs_interpose(struct dentry *lower_dentry, + struct dentry *dentry, struct super_block *sb) +{ + struct inode *inode = ecryptfs_get_inode(lower_dentry->d_inode, sb); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + d_instantiate(dentry, inode); + + return 0; +} + /** * ecryptfs_create_underlying_file * @lower_dir_inode: inode of the parent in the lower fs of the new file @@ -129,7 +220,7 @@ ecryptfs_do_create(struct inode *directory_inode, goto out_lock; } rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, - directory_inode->i_sb, 0); + directory_inode->i_sb); if (rc) { ecryptfs_printk(KERN_ERR, "Failure in ecryptfs_interpose\n"); goto out_lock; @@ -168,7 +259,8 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry) "context; rc = [%d]\n", rc); goto out; } - rc = ecryptfs_get_lower_file(ecryptfs_dentry); + rc = ecryptfs_get_lower_file(ecryptfs_dentry, + ecryptfs_dentry->d_inode); if (rc) { printk(KERN_ERR "%s: Error attempting to initialize " "the lower file for the dentry with name " @@ -215,102 +307,90 @@ out: return rc; } +static int ecryptfs_i_size_read(struct dentry *dentry, struct inode *inode) +{ + struct ecryptfs_crypt_stat *crypt_stat; + int rc; + + rc = ecryptfs_get_lower_file(dentry, inode); + if (rc) { + printk(KERN_ERR "%s: Error attempting to initialize " + "the lower file for the dentry with name " + "[%s]; rc = [%d]\n", __func__, + dentry->d_name.name, rc); + return rc; + } + + crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat; + /* TODO: lock for crypt_stat comparison */ + if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) + ecryptfs_set_default_sizes(crypt_stat); + + rc = ecryptfs_read_and_validate_header_region(inode); + ecryptfs_put_lower_file(inode); + if (rc) { + rc = ecryptfs_read_and_validate_xattr_region(dentry, inode); + if (!rc) + crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; + } + + /* Must return 0 to allow non-eCryptfs files to be looked up, too */ + return 0; +} + /** - * ecryptfs_lookup_and_interpose_lower - Perform a lookup + * ecryptfs_lookup_interpose - Dentry interposition for a lookup */ -int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry, - struct dentry *lower_dentry, - struct inode *ecryptfs_dir_inode) +static int ecryptfs_lookup_interpose(struct dentry *dentry, + struct dentry *lower_dentry, + struct inode *dir_inode) { - struct dentry *lower_dir_dentry; + struct inode *inode, *lower_inode = lower_dentry->d_inode; + struct ecryptfs_dentry_info *dentry_info; struct vfsmount *lower_mnt; - struct inode *lower_inode; - struct ecryptfs_crypt_stat *crypt_stat; - char *page_virt = NULL; - int put_lower = 0, rc = 0; - - lower_dir_dentry = lower_dentry->d_parent; - lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt( - ecryptfs_dentry->d_parent)); - lower_inode = lower_dentry->d_inode; - fsstack_copy_attr_atime(ecryptfs_dir_inode, lower_dir_dentry->d_inode); + int rc = 0; + + lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent)); + fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode); BUG_ON(!lower_dentry->d_count); - ecryptfs_set_dentry_private(ecryptfs_dentry, - kmem_cache_alloc(ecryptfs_dentry_info_cache, - GFP_KERNEL)); - if (!ecryptfs_dentry_to_private(ecryptfs_dentry)) { - rc = -ENOMEM; + + dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL); + ecryptfs_set_dentry_private(dentry, dentry_info); + if (!dentry_info) { printk(KERN_ERR "%s: Out of memory whilst attempting " "to allocate ecryptfs_dentry_info struct\n", __func__); - goto out_put; + dput(lower_dentry); + mntput(lower_mnt); + d_drop(dentry); + return -ENOMEM; } - ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry); - ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt); + ecryptfs_set_dentry_lower(dentry, lower_dentry); + ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt); + if (!lower_dentry->d_inode) { /* We want to add because we couldn't find in lower */ - d_add(ecryptfs_dentry, NULL); - goto out; - } - rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry, - ecryptfs_dir_inode->i_sb, - ECRYPTFS_INTERPOSE_FLAG_D_ADD); - if (rc) { - printk(KERN_ERR "%s: Error interposing; rc = [%d]\n", - __func__, rc); - goto out; - } - if (S_ISDIR(lower_inode->i_mode)) - goto out; - if (S_ISLNK(lower_inode->i_mode)) - goto out; - if (special_file(lower_inode->i_mode)) - goto out; - /* Released in this function */ - page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2, GFP_USER); - if (!page_virt) { - printk(KERN_ERR "%s: Cannot kmem_cache_zalloc() a page\n", - __func__); - rc = -ENOMEM; - goto out; + d_add(dentry, NULL); + return 0; } - rc = ecryptfs_get_lower_file(ecryptfs_dentry); - if (rc) { - printk(KERN_ERR "%s: Error attempting to initialize " - "the lower file for the dentry with name " - "[%s]; rc = [%d]\n", __func__, - ecryptfs_dentry->d_name.name, rc); - goto out_free_kmem; + inode = __ecryptfs_get_inode(lower_inode, dir_inode->i_sb); + if (IS_ERR(inode)) { + printk(KERN_ERR "%s: Error interposing; rc = [%ld]\n", + __func__, PTR_ERR(inode)); + return PTR_ERR(inode); } - put_lower = 1; - crypt_stat = &ecryptfs_inode_to_private( - ecryptfs_dentry->d_inode)->crypt_stat; - /* TODO: lock for crypt_stat comparison */ - if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) - ecryptfs_set_default_sizes(crypt_stat); - rc = ecryptfs_read_and_validate_header_region(page_virt, - ecryptfs_dentry->d_inode); - if (rc) { - memset(page_virt, 0, PAGE_CACHE_SIZE); - rc = ecryptfs_read_and_validate_xattr_region(page_virt, - ecryptfs_dentry); + if (S_ISREG(inode->i_mode)) { + rc = ecryptfs_i_size_read(dentry, inode); if (rc) { - rc = 0; - goto out_free_kmem; + make_bad_inode(inode); + return rc; } - crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR; } - ecryptfs_i_size_init(page_virt, ecryptfs_dentry->d_inode); -out_free_kmem: - kmem_cache_free(ecryptfs_header_cache_2, page_virt); - goto out; -out_put: - dput(lower_dentry); - mntput(lower_mnt); - d_drop(ecryptfs_dentry); -out: - if (put_lower) - ecryptfs_put_lower_file(ecryptfs_dentry->d_inode); + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + d_add(dentry, inode); + return rc; } @@ -353,12 +433,12 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, goto out_d_drop; } if (lower_dentry->d_inode) - goto lookup_and_interpose; + goto interpose; mount_crypt_stat = &ecryptfs_superblock_to_private( ecryptfs_dentry->d_sb)->mount_crypt_stat; if (!(mount_crypt_stat && (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES))) - goto lookup_and_interpose; + goto interpose; dput(lower_dentry); rc = ecryptfs_encrypt_and_encode_filename( &encrypted_and_encoded_name, &encrypted_and_encoded_name_size, @@ -381,9 +461,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, encrypted_and_encoded_name); goto out_d_drop; } -lookup_and_interpose: - rc = ecryptfs_lookup_and_interpose_lower(ecryptfs_dentry, lower_dentry, - ecryptfs_dir_inode); +interpose: + rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry, + ecryptfs_dir_inode); goto out; out_d_drop: d_drop(ecryptfs_dentry); @@ -411,7 +491,7 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir, lower_new_dentry); if (rc || !lower_new_dentry->d_inode) goto out_lock; - rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb, 0); + rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb); if (rc) goto out_lock; fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); @@ -478,7 +558,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, kfree(encoded_symname); if (rc || !lower_dentry->d_inode) goto out_lock; - rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); if (rc) goto out_lock; fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); @@ -502,7 +582,7 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) rc = vfs_mkdir(lower_dir_dentry->d_inode, lower_dentry, mode); if (rc || !lower_dentry->d_inode) goto out; - rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); if (rc) goto out; fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); @@ -521,8 +601,6 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry) struct dentry *lower_dir_dentry; int rc; - dentry_unhash(dentry); - lower_dentry = ecryptfs_dentry_to_lower(dentry); dget(dentry); lower_dir_dentry = lock_parent(lower_dentry); @@ -552,7 +630,7 @@ ecryptfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) rc = vfs_mknod(lower_dir_dentry->d_inode, lower_dentry, mode, dev); if (rc || !lower_dentry->d_inode) goto out; - rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 0); + rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb); if (rc) goto out; fsstack_copy_attr_times(dir, lower_dir_dentry->d_inode); @@ -575,9 +653,6 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct dentry *lower_new_dir_dentry; struct dentry *trap = NULL; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry); lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry); dget(lower_old_dentry); @@ -755,7 +830,7 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia, lower_ia->ia_valid &= ~ATTR_SIZE; return 0; } - rc = ecryptfs_get_lower_file(dentry); + rc = ecryptfs_get_lower_file(dentry, inode); if (rc) return rc; crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; @@ -911,7 +986,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia) mount_crypt_stat = &ecryptfs_superblock_to_private( dentry->d_sb)->mount_crypt_stat; - rc = ecryptfs_get_lower_file(dentry); + rc = ecryptfs_get_lower_file(dentry, inode); if (rc) { mutex_unlock(&crypt_stat->cs_mutex); goto out; @@ -1084,21 +1159,6 @@ out: return rc; } -int ecryptfs_inode_test(struct inode *inode, void *candidate_lower_inode) -{ - if ((ecryptfs_inode_to_lower(inode) - == (struct inode *)candidate_lower_inode)) - return 1; - else - return 0; -} - -int ecryptfs_inode_set(struct inode *inode, void *lower_inode) -{ - ecryptfs_init_inode(inode, (struct inode *)lower_inode); - return 0; -} - const struct inode_operations ecryptfs_symlink_iops = { .readlink = ecryptfs_readlink, .follow_link = ecryptfs_follow_link, diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 89b93389af8..9f1bb747d77 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -135,12 +135,12 @@ static int ecryptfs_init_lower_file(struct dentry *dentry, return rc; } -int ecryptfs_get_lower_file(struct dentry *dentry) +int ecryptfs_get_lower_file(struct dentry *dentry, struct inode *inode) { - struct ecryptfs_inode_info *inode_info = - ecryptfs_inode_to_private(dentry->d_inode); + struct ecryptfs_inode_info *inode_info; int count, rc = 0; + inode_info = ecryptfs_inode_to_private(inode); mutex_lock(&inode_info->lower_file_mutex); count = atomic_inc_return(&inode_info->lower_file_count); if (WARN_ON_ONCE(count < 1)) @@ -168,75 +168,6 @@ void ecryptfs_put_lower_file(struct inode *inode) } } -static struct inode *ecryptfs_get_inode(struct inode *lower_inode, - struct super_block *sb) -{ - struct inode *inode; - int rc = 0; - - if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb)) { - rc = -EXDEV; - goto out; - } - if (!igrab(lower_inode)) { - rc = -ESTALE; - goto out; - } - inode = iget5_locked(sb, (unsigned long)lower_inode, - ecryptfs_inode_test, ecryptfs_inode_set, - lower_inode); - if (!inode) { - rc = -EACCES; - iput(lower_inode); - goto out; - } - if (inode->i_state & I_NEW) - unlock_new_inode(inode); - else - iput(lower_inode); - if (S_ISLNK(lower_inode->i_mode)) - inode->i_op = &ecryptfs_symlink_iops; - else if (S_ISDIR(lower_inode->i_mode)) - inode->i_op = &ecryptfs_dir_iops; - if (S_ISDIR(lower_inode->i_mode)) - inode->i_fop = &ecryptfs_dir_fops; - if (special_file(lower_inode->i_mode)) - init_special_inode(inode, lower_inode->i_mode, - lower_inode->i_rdev); - fsstack_copy_attr_all(inode, lower_inode); - /* This size will be overwritten for real files w/ headers and - * other metadata */ - fsstack_copy_inode_size(inode, lower_inode); - return inode; -out: - return ERR_PTR(rc); -} - -/** - * ecryptfs_interpose - * @lower_dentry: Existing dentry in the lower filesystem - * @dentry: ecryptfs' dentry - * @sb: ecryptfs's super_block - * @flags: flags to govern behavior of interpose procedure - * - * Interposes upper and lower dentries. - * - * Returns zero on success; non-zero otherwise - */ -int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, - struct super_block *sb, u32 flags) -{ - struct inode *lower_inode = lower_dentry->d_inode; - struct inode *inode = ecryptfs_get_inode(lower_inode, sb); - if (IS_ERR(inode)) - return PTR_ERR(inode); - if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD) - d_add(dentry, inode); - else - d_instantiate(dentry, inode); - return 0; -} - enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_cipher, ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes, @@ -704,13 +635,8 @@ static struct ecryptfs_cache_info { .size = sizeof(struct ecryptfs_sb_info), }, { - .cache = &ecryptfs_header_cache_1, - .name = "ecryptfs_headers_1", - .size = PAGE_CACHE_SIZE, - }, - { - .cache = &ecryptfs_header_cache_2, - .name = "ecryptfs_headers_2", + .cache = &ecryptfs_header_cache, + .name = "ecryptfs_headers", .size = PAGE_CACHE_SIZE, }, { diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index 245b517bf1b..dbd52d40df4 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c @@ -93,22 +93,6 @@ static void ecryptfs_destroy_inode(struct inode *inode) } /** - * ecryptfs_init_inode - * @inode: The ecryptfs inode - * - * Set up the ecryptfs inode. - */ -void ecryptfs_init_inode(struct inode *inode, struct inode *lower_inode) -{ - ecryptfs_set_inode_lower(inode, lower_inode); - inode->i_ino = lower_inode->i_ino; - inode->i_version++; - inode->i_op = &ecryptfs_main_iops; - inode->i_fop = &ecryptfs_main_fops; - inode->i_mapping->a_ops = &ecryptfs_aops; -} - -/** * ecryptfs_statfs * @sb: The ecryptfs super block * @buf: The struct kstatfs to fill in with stats diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 68b2e43d7c3..3451d23c3ba 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -3392,7 +3392,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) * so would cause a commit on atime updates, which we don't bother doing. * We handle synchronous inodes at the highest possible level. */ -void ext3_dirty_inode(struct inode *inode) +void ext3_dirty_inode(struct inode *inode, int flags) { handle_t *current_handle = ext3_journal_current_handle(); handle_t *handle; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a74b89c09f9..1921392cd70 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1813,7 +1813,7 @@ extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, extern void ext4_evict_inode(struct inode *); extern void ext4_clear_inode(struct inode *); extern int ext4_sync_inode(handle_t *, struct inode *); -extern void ext4_dirty_inode(struct inode *); +extern void ext4_dirty_inode(struct inode *, int); extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_can_truncate(struct inode *inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 50d0e9c6458..a5763e3505b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5733,7 +5733,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) * so would cause a commit on atime updates, which we don't bother doing. * We handle synchronous inodes at the highest possible level. */ -void ext4_dirty_inode(struct inode *inode) +void ext4_dirty_inode(struct inode *inode, int flags) { handle_t *handle; diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index be15437c272..3b222dafd15 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -326,8 +326,6 @@ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) struct fat_slot_info sinfo; int err; - dentry_unhash(dentry); - lock_super(sb); /* * Check whether the directory is not in use, then check @@ -459,9 +457,6 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - err = fat_scan(old_dir, old_name, &old_sinfo); if (err) { err = -EIO; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index c61a6789f36..20b4ea53fdc 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -824,8 +824,6 @@ static int vfat_rmdir(struct inode *dir, struct dentry *dentry) struct fat_slot_info sinfo; int err; - dentry_unhash(dentry); - lock_super(sb); err = fat_dir_empty(inode); @@ -933,9 +931,6 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry, int err, is_dir, update_dotdot, corrupt = 0; struct super_block *sb = old_dir->i_sb; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 34591ee804b..0f015a0468d 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1007,9 +1007,6 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) * In short, make sure you hash any inodes _before_ you start marking * them dirty. * - * This function *must* be atomic for the I_DIRTY_PAGES case - - * set_page_dirty() is called under spinlock in several places. - * * Note that for blockdevs, inode->dirtied_when represents the dirtying time of * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of * the kernel-internal blockdev inode represents the dirtying time of the @@ -1028,7 +1025,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) */ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { if (sb->s_op->dirty_inode) - sb->s_op->dirty_inode(inode); + sb->s_op->dirty_inode(inode, flags); } /* diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0d0e3faddcf..d5016071459 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -667,8 +667,6 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) if (IS_ERR(req)) return PTR_ERR(req); - dentry_unhash(entry); - req->in.h.opcode = FUSE_RMDIR; req->in.h.nodeid = get_node_id(dir); req->in.numargs = 1; @@ -694,9 +692,6 @@ static int fuse_rename(struct inode *olddir, struct dentry *oldent, struct fuse_conn *fc = get_fuse_conn(olddir); struct fuse_req *req = fuse_get_req(fc); - if (newent->d_inode && S_ISDIR(newent->d_inode->i_mode)) - dentry_unhash(newent); - if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 1cb70cdba2c..b4d70b13be9 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -253,9 +253,6 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int res; - if (S_ISDIR(inode->i_mode)) - dentry_unhash(dentry); - if (S_ISDIR(inode->i_mode) && inode->i_size != 2) return -ENOTEMPTY; res = hfs_cat_delete(inode->i_ino, dir, &dentry->d_name); @@ -286,9 +283,6 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* Unlink destination if it already exists */ if (new_dentry->d_inode) { - if (S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - res = hfs_remove(new_dir, new_dentry); if (res) return res; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index b28835091dd..4df5059c25d 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -370,8 +370,6 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) struct inode *inode = dentry->d_inode; int res; - dentry_unhash(dentry); - if (inode->i_size != 2) return -ENOTEMPTY; @@ -469,12 +467,10 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, /* Unlink destination if it already exists */ if (new_dentry->d_inode) { - if (S_ISDIR(new_dentry->d_inode->i_mode)) { - dentry_unhash(new_dentry); + if (S_ISDIR(new_dentry->d_inode->i_mode)) res = hfsplus_rmdir(new_dir, new_dentry); - } else { + else res = hfsplus_unlink(new_dir, new_dentry); - } if (res) return res; } diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index e6816b9e690..2638c834ed2 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -683,8 +683,6 @@ int hostfs_rmdir(struct inode *ino, struct dentry *dentry) char *file; int err; - dentry_unhash(dentry); - if ((file = dentry_name(dentry)) == NULL) return -ENOMEM; err = do_rmdir(file); @@ -738,9 +736,6 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, char *from_name, *to_name; int err; - if (to->d_inode && S_ISDIR(to->d_inode->i_mode)) - dentry_unhash(to); - if ((from_name = dentry_name(from)) == NULL) return -ENOMEM; if ((to_name = dentry_name(to)) == NULL) { diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index ff0ce21c086..acf95dab2aa 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -439,8 +439,6 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) int err; int r; - dentry_unhash(dentry); - hpfs_adjust_length(name, &len); hpfs_lock(dir->i_sb); err = -ENOENT; @@ -535,9 +533,6 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct fnode *fnode; int err; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - if ((err = hpfs_chk_name(new_name, &new_len))) return err; err = 0; hpfs_adjust_length(old_name, &old_len); diff --git a/fs/inode.c b/fs/inode.c index 990d284877a..0f7e88a7803 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1,9 +1,7 @@ /* - * linux/fs/inode.c - * * (C) 1997 Linus Torvalds + * (C) 1999 Andrea Arcangeli <andrea@suse.de> (dynamic inode allocation) */ - #include <linux/fs.h> #include <linux/mm.h> #include <linux/dcache.h> @@ -27,10 +25,11 @@ #include <linux/prefetch.h> #include <linux/ima.h> #include <linux/cred.h> +#include <linux/buffer_head.h> /* for inode_has_buffers */ #include "internal.h" /* - * inode locking rules. + * Inode locking rules: * * inode->i_lock protects: * inode->i_state, inode->i_hash, __iget() @@ -60,54 +59,11 @@ * inode_hash_lock */ -/* - * This is needed for the following functions: - * - inode_has_buffers - * - invalidate_bdev - * - * FIXME: remove all knowledge of the buffer layer from this file - */ -#include <linux/buffer_head.h> - -/* - * New inode.c implementation. - * - * This implementation has the basic premise of trying - * to be extremely low-overhead and SMP-safe, yet be - * simple enough to be "obviously correct". - * - * Famous last words. - */ - -/* inode dynamic allocation 1999, Andrea Arcangeli <andrea@suse.de> */ - -/* #define INODE_PARANOIA 1 */ -/* #define INODE_DEBUG 1 */ - -/* - * Inode lookup is no longer as critical as it used to be: - * most of the lookups are going to be through the dcache. - */ -#define I_HASHBITS i_hash_shift -#define I_HASHMASK i_hash_mask - static unsigned int i_hash_mask __read_mostly; static unsigned int i_hash_shift __read_mostly; static struct hlist_head *inode_hashtable __read_mostly; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); -/* - * Each inode can be on two separate lists. One is - * the hash list of the inode, used for lookups. The - * other linked list is the "type" list: - * "in_use" - valid inode, i_count > 0, i_nlink > 0 - * "dirty" - as "in_use" but also dirty - * "unused" - valid inode, i_count = 0 - * - * A "dirty" list is maintained for each super block, - * allowing for low-overhead inode sync() operations. - */ - static LIST_HEAD(inode_lru); static DEFINE_SPINLOCK(inode_lru_lock); @@ -424,8 +380,8 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval) tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / L1_CACHE_BYTES; - tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); - return tmp & I_HASHMASK; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> i_hash_shift); + return tmp & i_hash_mask; } /** diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 9a1e86fc136..4bca6a2e5c0 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -605,8 +605,6 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry) int ret; uint32_t now = get_seconds(); - dentry_unhash(dentry); - for (fd = f->dents ; fd; fd = fd->next) { if (fd->ino) return -ENOTEMPTY; @@ -782,9 +780,6 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, uint8_t type; uint32_t now; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - /* The VFS will check for us and prevent trying to rename a * file over a directory and vice versa, but if it's a directory, * the VFS can't check whether the victim is empty. The filesystem diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index e896e67767e..46ad619b612 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -357,7 +357,7 @@ error: return ERR_PTR(ret); } -void jffs2_dirty_inode(struct inode *inode) +void jffs2_dirty_inode(struct inode *inode, int flags) { struct iattr iattr; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 00bae7cc2e4..65c6c43ca48 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -172,7 +172,7 @@ int jffs2_setattr (struct dentry *, struct iattr *); int jffs2_do_setattr (struct inode *, struct iattr *); struct inode *jffs2_iget(struct super_block *, unsigned long); void jffs2_evict_inode (struct inode *); -void jffs2_dirty_inode(struct inode *inode); +void jffs2_dirty_inode(struct inode *inode, int flags); struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri); int jffs2_statfs (struct dentry *, struct kstatfs *); diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index eddbb373209..109655904bb 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -173,7 +173,7 @@ void jfs_evict_inode(struct inode *inode) dquot_drop(inode); } -void jfs_dirty_inode(struct inode *inode) +void jfs_dirty_inode(struct inode *inode, int flags) { static int noisy = 5; diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h index 155e91eff07..ec2fb8b945f 100644 --- a/fs/jfs/jfs_inode.h +++ b/fs/jfs/jfs_inode.h @@ -28,7 +28,7 @@ extern struct inode *jfs_iget(struct super_block *, unsigned long); extern int jfs_commit_inode(struct inode *, int); extern int jfs_write_inode(struct inode *, struct writeback_control *); extern void jfs_evict_inode(struct inode *); -extern void jfs_dirty_inode(struct inode *); +extern void jfs_dirty_inode(struct inode *, int); extern void jfs_truncate(struct inode *); extern void jfs_truncate_nolock(struct inode *, loff_t); extern void jfs_free_zero_link(struct inode *); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 865df16a6cf..eaaf2b511e8 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -360,8 +360,6 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry) jfs_info("jfs_rmdir: dip:0x%p name:%s", dip, dentry->d_name.name); - dentry_unhash(dentry); - /* Init inode for quota operations. */ dquot_initialize(dip); dquot_initialize(ip); @@ -1097,9 +1095,6 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry, jfs_info("jfs_rename: %s %s", old_dentry->d_name.name, new_dentry->d_name.name); - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - dquot_initialize(old_dir); dquot_initialize(new_dir); diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index f34c9cde9e9..9ed89d1663f 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -273,8 +273,6 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - dentry_unhash(dentry); - if (!logfs_empty_dir(inode)) return -ENOTEMPTY; @@ -624,9 +622,6 @@ static int logfs_rename_cross(struct inode *old_dir, struct dentry *old_dentry, loff_t pos; int err; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - /* 1. locate source dd */ err = logfs_get_dd(old_dir, old_dentry, &dd, &pos); if (err) diff --git a/fs/minix/namei.c b/fs/minix/namei.c index f60aed8db9c..6e6777f1b4b 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -168,8 +168,6 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err = -ENOTEMPTY; - dentry_unhash(dentry); - if (minix_empty_dir(inode)) { err = minix_unlink(dir, dentry); if (!err) { @@ -192,9 +190,6 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry, struct minix_dir_entry * old_de; int err = -ENOENT; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - old_de = minix_find_entry(old_dentry, &old_page); if (!old_de) goto out; diff --git a/fs/namei.c b/fs/namei.c index 2358b326b22..e2e4e8d032e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -919,12 +919,11 @@ static inline bool managed_dentry_might_block(struct dentry *dentry) } /* - * Skip to top of mountpoint pile in rcuwalk mode. We abort the rcu-walk if we - * meet a managed dentry and we're not walking to "..". True is returned to - * continue, false to abort. + * Try to skip to top of mountpoint pile in rcuwalk mode. Fail if + * we meet a managed dentry that would need blocking. */ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, - struct inode **inode, bool reverse_transit) + struct inode **inode) { for (;;) { struct vfsmount *mounted; @@ -933,8 +932,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, * that wants to block transit. */ *inode = path->dentry->d_inode; - if (!reverse_transit && - unlikely(managed_dentry_might_block(path->dentry))) + if (unlikely(managed_dentry_might_block(path->dentry))) return false; if (!d_mountpoint(path->dentry)) @@ -947,16 +945,24 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, path->dentry = mounted->mnt_root; nd->seq = read_seqcount_begin(&path->dentry->d_seq); } - - if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) - return reverse_transit; return true; } -static int follow_dotdot_rcu(struct nameidata *nd) +static void follow_mount_rcu(struct nameidata *nd) { - struct inode *inode = nd->inode; + while (d_mountpoint(nd->path.dentry)) { + struct vfsmount *mounted; + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1); + if (!mounted) + break; + nd->path.mnt = mounted; + nd->path.dentry = mounted->mnt_root; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + } +} +static int follow_dotdot_rcu(struct nameidata *nd) +{ set_root_rcu(nd); while (1) { @@ -972,7 +978,6 @@ static int follow_dotdot_rcu(struct nameidata *nd) seq = read_seqcount_begin(&parent->d_seq); if (read_seqcount_retry(&old->d_seq, nd->seq)) goto failed; - inode = parent->d_inode; nd->path.dentry = parent; nd->seq = seq; break; @@ -980,10 +985,9 @@ static int follow_dotdot_rcu(struct nameidata *nd) if (!follow_up_rcu(&nd->path)) break; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - inode = nd->path.dentry->d_inode; } - __follow_mount_rcu(nd, &nd->path, &inode, true); - nd->inode = inode; + follow_mount_rcu(nd); + nd->inode = nd->path.dentry->d_inode; return 0; failed: @@ -1157,8 +1161,11 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, } path->mnt = mnt; path->dentry = dentry; - if (likely(__follow_mount_rcu(nd, path, inode, false))) - return 0; + if (unlikely(!__follow_mount_rcu(nd, path, inode))) + goto unlazy; + if (unlikely(path->dentry->d_flags & DCACHE_NEED_AUTOMOUNT)) + goto unlazy; + return 0; unlazy: if (unlazy_walk(nd, dentry)) return -ECHILD; @@ -2572,6 +2579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (error) goto out; + shrink_dcache_parent(dentry); error = dir->i_op->rmdir(dir, dentry); if (error) goto out; @@ -2986,6 +2994,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) goto out; + if (target) + shrink_dcache_parent(new_dentry); error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (error) goto out; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index e3e646b0640..9c51f621e90 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -1033,8 +1033,11 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) DPRINTK("ncp_rmdir: removing %s/%s\n", dentry->d_parent->d_name.name, dentry->d_name.name); + /* + * fail with EBUSY if there are still references to this + * directory. + */ dentry_unhash(dentry); - error = -EBUSY; if (!d_unhashed(dentry)) goto out; @@ -1141,8 +1144,16 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, old_dentry->d_parent->d_name.name, old_dentry->d_name.name, new_dentry->d_parent->d_name.name, new_dentry->d_name.name); - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) + if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) { + /* + * fail with EBUSY if there are still references to this + * directory. + */ dentry_unhash(new_dentry); + error = -EBUSY; + if (!d_unhashed(new_dentry)) + goto out; + } ncp_age_dentry(server, old_dentry); ncp_age_dentry(server, new_dentry); diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index ba306658a6d..81515545ba7 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -87,6 +87,16 @@ config NFS_V4_1 config PNFS_FILE_LAYOUT tristate +config PNFS_OBJLAYOUT + tristate "Provide support for the pNFS Objects Layout Driver for NFSv4.1 pNFS (EXPERIMENTAL)" + depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD + help + Say M here if you want your pNFS client to support the Objects Layout Driver. + Requires the SCSI osd initiator library (SCSI_OSD_INITIATOR) and + upper level driver (SCSI_OSD_ULD). + + If unsure, say N. + config ROOT_NFS bool "Root file system on NFS" depends on NFS_FS=y && IP_PNP diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 4776ff9e381..6a34f7dd0e6 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -15,9 +15,11 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o \ nfs4namespace.o -nfs-$(CONFIG_NFS_V4_1) += pnfs.o +nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o + +obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 46d93ce7311..b257383bb56 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -167,6 +167,23 @@ extern unsigned nfs4_callback_layoutrecall( extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); extern void nfs4_cb_take_slot(struct nfs_client *clp); + +struct cb_devicenotifyitem { + uint32_t cbd_notify_type; + uint32_t cbd_layout_type; + struct nfs4_deviceid cbd_dev_id; + uint32_t cbd_immediate; +}; + +struct cb_devicenotifyargs { + int ndevs; + struct cb_devicenotifyitem *devs; +}; + +extern __be32 nfs4_callback_devicenotify( + struct cb_devicenotifyargs *args, + void *dummy, struct cb_process_state *cps); + #endif /* CONFIG_NFS_V4_1 */ extern int check_gss_callback_principal(struct nfs_client *, struct svc_rqst *); extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args, diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 2f41dccea18..d4d1954e9bb 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -139,7 +139,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, spin_lock(&ino->i_lock); if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || mark_matching_lsegs_invalid(lo, &free_me_list, - args->cbl_range.iomode)) + &args->cbl_range)) rv = NFS4ERR_DELAY; else rv = NFS4ERR_NOMATCHING_LAYOUT; @@ -184,7 +184,7 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, ino = lo->plh_inode; spin_lock(&ino->i_lock); set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); - if (mark_matching_lsegs_invalid(lo, &free_me_list, range.iomode)) + if (mark_matching_lsegs_invalid(lo, &free_me_list, &range)) rv = NFS4ERR_DELAY; list_del_init(&lo->plh_bulk_recall); spin_unlock(&ino->i_lock); @@ -241,6 +241,53 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp) do_callback_layoutrecall(clp, &args); } +__be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args, + void *dummy, struct cb_process_state *cps) +{ + int i; + __be32 res = 0; + struct nfs_client *clp = cps->clp; + struct nfs_server *server = NULL; + + dprintk("%s: -->\n", __func__); + + if (!clp) { + res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); + goto out; + } + + for (i = 0; i < args->ndevs; i++) { + struct cb_devicenotifyitem *dev = &args->devs[i]; + + if (!server || + server->pnfs_curr_ld->id != dev->cbd_layout_type) { + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) + if (server->pnfs_curr_ld && + server->pnfs_curr_ld->id == dev->cbd_layout_type) { + rcu_read_unlock(); + goto found; + } + rcu_read_unlock(); + dprintk("%s: layout type %u not found\n", + __func__, dev->cbd_layout_type); + continue; + } + + found: + if (dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) + dprintk("%s: NOTIFY_DEVICEID4_CHANGE not supported, " + "deleting instead\n", __func__); + nfs4_delete_deviceid(server->pnfs_curr_ld, clp, &dev->cbd_dev_id); + } + +out: + kfree(args->devs); + dprintk("%s: exit with status = %u\n", + __func__, be32_to_cpu(res)); + return res; +} + int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid) { if (delegation == NULL) diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 00ecf62ce7c..c6c86a77e04 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -25,6 +25,7 @@ #if defined(CONFIG_NFS_V4_1) #define CB_OP_LAYOUTRECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) +#define CB_OP_DEVICENOTIFY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) #define CB_OP_SEQUENCE_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ 4 + 1 + 3) #define CB_OP_RECALLANY_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) @@ -284,6 +285,93 @@ out: return status; } +static +__be32 decode_devicenotify_args(struct svc_rqst *rqstp, + struct xdr_stream *xdr, + struct cb_devicenotifyargs *args) +{ + __be32 *p; + __be32 status = 0; + u32 tmp; + int n, i; + args->ndevs = 0; + + /* Num of device notifications */ + p = read_buf(xdr, sizeof(uint32_t)); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_BADXDR); + goto out; + } + n = ntohl(*p++); + if (n <= 0) + goto out; + + args->devs = kmalloc(n * sizeof(*args->devs), GFP_KERNEL); + if (!args->devs) { + status = htonl(NFS4ERR_DELAY); + goto out; + } + + /* Decode each dev notification */ + for (i = 0; i < n; i++) { + struct cb_devicenotifyitem *dev = &args->devs[i]; + + p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_BADXDR); + goto err; + } + + tmp = ntohl(*p++); /* bitmap size */ + if (tmp != 1) { + status = htonl(NFS4ERR_INVAL); + goto err; + } + dev->cbd_notify_type = ntohl(*p++); + if (dev->cbd_notify_type != NOTIFY_DEVICEID4_CHANGE && + dev->cbd_notify_type != NOTIFY_DEVICEID4_DELETE) { + status = htonl(NFS4ERR_INVAL); + goto err; + } + + tmp = ntohl(*p++); /* opaque size */ + if (((dev->cbd_notify_type == NOTIFY_DEVICEID4_CHANGE) && + (tmp != NFS4_DEVICEID4_SIZE + 8)) || + ((dev->cbd_notify_type == NOTIFY_DEVICEID4_DELETE) && + (tmp != NFS4_DEVICEID4_SIZE + 4))) { + status = htonl(NFS4ERR_INVAL); + goto err; + } + dev->cbd_layout_type = ntohl(*p++); + memcpy(dev->cbd_dev_id.data, p, NFS4_DEVICEID4_SIZE); + p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); + + if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) { + p = read_buf(xdr, sizeof(uint32_t)); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_BADXDR); + goto err; + } + dev->cbd_immediate = ntohl(*p++); + } else { + dev->cbd_immediate = 0; + } + + args->ndevs++; + + dprintk("%s: type %d layout 0x%x immediate %d\n", + __func__, dev->cbd_notify_type, dev->cbd_layout_type, + dev->cbd_immediate); + } +out: + dprintk("%s: status %d ndevs %d\n", + __func__, ntohl(status), args->ndevs); + return status; +err: + kfree(args->devs); + goto out; +} + static __be32 decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid) { @@ -639,10 +727,10 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) case OP_CB_RECALL_ANY: case OP_CB_RECALL_SLOT: case OP_CB_LAYOUTRECALL: + case OP_CB_NOTIFY_DEVICEID: *op = &callback_ops[op_nr]; break; - case OP_CB_NOTIFY_DEVICEID: case OP_CB_NOTIFY: case OP_CB_PUSH_DELEG: case OP_CB_RECALLABLE_OBJ_AVAIL: @@ -849,6 +937,12 @@ static struct callback_op callback_ops[] = { (callback_decode_arg_t)decode_layoutrecall_args, .res_maxsize = CB_OP_LAYOUTRECALL_RES_MAXSZ, }, + [OP_CB_NOTIFY_DEVICEID] = { + .process_op = (callback_process_op_t)nfs4_callback_devicenotify, + .decode_args = + (callback_decode_arg_t)decode_devicenotify_args, + .res_maxsize = CB_OP_DEVICENOTIFY_RES_MAXSZ, + }, [OP_CB_SEQUENCE] = { .process_op = (callback_process_op_t)nfs4_callback_sequence, .decode_args = (callback_decode_arg_t)decode_cb_sequence_args, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 139be9647d8..b3dc2b88b65 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -290,6 +290,8 @@ static void nfs_free_client(struct nfs_client *clp) if (clp->cl_machine_cred != NULL) put_rpccred(clp->cl_machine_cred); + nfs4_deviceid_purge_client(clp); + kfree(clp->cl_hostname); kfree(clp); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index bbbc6bf5cb2..dd25c2aec37 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -21,25 +21,13 @@ #include "delegation.h" #include "internal.h" -static void nfs_do_free_delegation(struct nfs_delegation *delegation) -{ - kfree(delegation); -} - -static void nfs_free_delegation_callback(struct rcu_head *head) -{ - struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu); - - nfs_do_free_delegation(delegation); -} - static void nfs_free_delegation(struct nfs_delegation *delegation) { if (delegation->cred) { put_rpccred(delegation->cred); delegation->cred = NULL; } - call_rcu(&delegation->rcu, nfs_free_delegation_callback); + kfree_rcu(delegation, rcu); } /** diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 424e47773a8..ededdbd0db3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -512,12 +512,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en struct page **xdr_pages, struct page *page, unsigned int buflen) { struct xdr_stream stream; - struct xdr_buf buf = { - .pages = xdr_pages, - .page_len = buflen, - .buflen = buflen, - .len = buflen, - }; + struct xdr_buf buf; struct page *scratch; struct nfs_cache_array *array; unsigned int count = 0; @@ -527,7 +522,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en if (scratch == NULL) return -ENOMEM; - xdr_init_decode(&stream, &buf, NULL); + xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); do { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 57bb31ad7a5..144f2a3c718 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1298,8 +1298,12 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) i_size_write(inode, new_isize); invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } - dprintk("NFS: isize change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); + dprintk("NFS: isize change on server for file %s/%ld " + "(%Ld to %Ld)\n", + inode->i_sb->s_id, + inode->i_ino, + (long long)cur_isize, + (long long)new_isize); } } else invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR @@ -1424,9 +1428,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ void nfs4_evict_inode(struct inode *inode) { - pnfs_destroy_layout(NFS_I(inode)); truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); + pnfs_return_layout(inode); + pnfs_destroy_layout(NFS_I(inode)); /* If we are holding a delegation, return it! */ nfs_inode_return_delegation_noreclaim(inode); /* First call standard NFS clear_inode() code */ diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2df6ca7b589..b9056cbe68d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -310,6 +310,7 @@ extern int nfs_migrate_page(struct address_space *, #endif /* nfs4proc.c */ +extern void __nfs4_read_done_cb(struct nfs_read_data *); extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data); extern int nfs4_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms, diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index be79dc9f386..426908809c9 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -421,6 +421,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_deviceid *id, gfp_t gfp_flags) { + struct nfs4_deviceid_node *d; struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; struct nfs_server *nfss = NFS_SERVER(lo->plh_inode); @@ -428,7 +429,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); if (fl->pattern_offset > lgr->range.offset) { - dprintk("%s pattern_offset %lld to large\n", + dprintk("%s pattern_offset %lld too large\n", __func__, fl->pattern_offset); goto out; } @@ -440,12 +441,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, } /* find and reference the deviceid */ - dsaddr = nfs4_fl_find_get_deviceid(id); - if (dsaddr == NULL) { + d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, + NFS_SERVER(lo->plh_inode)->nfs_client, id); + if (d == NULL) { dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); if (dsaddr == NULL) goto out; - } + } else + dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); fl->dsaddr = dsaddr; if (fl->first_stripe_index < 0 || @@ -507,12 +510,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, gfp_t gfp_flags) { struct xdr_stream stream; - struct xdr_buf buf = { - .pages = lgr->layoutp->pages, - .page_len = lgr->layoutp->len, - .buflen = lgr->layoutp->len, - .len = lgr->layoutp->len, - }; + struct xdr_buf buf; struct page *scratch; __be32 *p; uint32_t nfl_util; @@ -524,7 +522,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, if (!scratch) return -ENOMEM; - xdr_init_decode(&stream, &buf, NULL); + xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len); xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8), @@ -535,7 +533,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, memcpy(id, p, sizeof(*id)); p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); - print_deviceid(id); + nfs4_print_deviceid(id); nfl_util = be32_to_cpup(p++); if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) @@ -653,16 +651,19 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * - * return 1 : coalesce page - * return 0 : don't coalesce page + * return true : coalesce page + * return false : don't coalesce page */ -int +bool filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { u64 p_stripe, r_stripe; u32 stripe_unit; + if (!pnfs_generic_pg_test(pgio, prev, req)) + return 0; + if (!pgio->pg_lseg) return 1; p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; @@ -860,6 +861,12 @@ filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, return -ENOMEM; } +static void +filelayout_free_deveiceid_node(struct nfs4_deviceid_node *d) +{ + nfs4_fl_free_deviceid(container_of(d, struct nfs4_file_layout_dsaddr, id_node)); +} + static struct pnfs_layoutdriver_type filelayout_type = { .id = LAYOUT_NFSV4_1_FILES, .name = "LAYOUT_NFSV4_1_FILES", @@ -872,6 +879,7 @@ static struct pnfs_layoutdriver_type filelayout_type = { .commit_pagelist = filelayout_commit_pagelist, .read_pagelist = filelayout_read_pagelist, .write_pagelist = filelayout_write_pagelist, + .free_deviceid_node = filelayout_free_deveiceid_node, }; static int __init nfs4filelayout_init(void) diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 2b461d77b43..cebe01e3795 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -59,9 +59,7 @@ struct nfs4_pnfs_ds { #define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001 struct nfs4_file_layout_dsaddr { - struct hlist_node node; - struct nfs4_deviceid deviceid; - atomic_t ref; + struct nfs4_deviceid_node id_node; unsigned long flags; u32 stripe_count; u8 *stripe_indices; @@ -95,14 +93,12 @@ extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); extern void print_ds(struct nfs4_pnfs_ds *ds); -extern void print_deviceid(struct nfs4_deviceid *dev_id); u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset); u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j); struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx); -extern struct nfs4_file_layout_dsaddr * -nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id); extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); +extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index db07c7af139..3b7bf137726 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -37,30 +37,6 @@ #define NFSDBG_FACILITY NFSDBG_PNFS_LD /* - * Device ID RCU cache. A device ID is unique per client ID and layout type. - */ -#define NFS4_FL_DEVICE_ID_HASH_BITS 5 -#define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS) -#define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1) - -static inline u32 -nfs4_fl_deviceid_hash(struct nfs4_deviceid *id) -{ - unsigned char *cptr = (unsigned char *)id->data; - unsigned int nbytes = NFS4_DEVICEID4_SIZE; - u32 x = 0; - - while (nbytes--) { - x *= 37; - x += *cptr++; - } - return x & NFS4_FL_DEVICE_ID_HASH_MASK; -} - -static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE]; -static DEFINE_SPINLOCK(filelayout_deviceid_lock); - -/* * Data server cache * * Data servers can be mapped to different device ids. @@ -89,27 +65,6 @@ print_ds(struct nfs4_pnfs_ds *ds) ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); } -void -print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr) -{ - int i; - - ifdebug(FACILITY) { - printk("%s dsaddr->ds_num %d\n", __func__, - dsaddr->ds_num); - for (i = 0; i < dsaddr->ds_num; i++) - print_ds(dsaddr->ds_list[i]); - } -} - -void print_deviceid(struct nfs4_deviceid *id) -{ - u32 *p = (u32 *)id; - - dprintk("%s: device id= [%x%x%x%x]\n", __func__, - p[0], p[1], p[2], p[3]); -} - /* nfs4_ds_cache_lock is held */ static struct nfs4_pnfs_ds * _data_server_lookup_locked(u32 ip_addr, u32 port) @@ -201,13 +156,13 @@ destroy_ds(struct nfs4_pnfs_ds *ds) kfree(ds); } -static void +void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) { struct nfs4_pnfs_ds *ds; int i; - print_deviceid(&dsaddr->deviceid); + nfs4_print_deviceid(&dsaddr->id_node.deviceid); for (i = 0; i < dsaddr->ds_num; i++) { ds = dsaddr->ds_list[i]; @@ -353,12 +308,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) u8 max_stripe_index; struct nfs4_file_layout_dsaddr *dsaddr = NULL; struct xdr_stream stream; - struct xdr_buf buf = { - .pages = pdev->pages, - .page_len = pdev->pglen, - .buflen = pdev->pglen, - .len = pdev->pglen, - }; + struct xdr_buf buf; struct page *scratch; /* set up xdr stream */ @@ -366,7 +316,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) if (!scratch) goto out_err; - xdr_init_decode(&stream, &buf, NULL); + xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); /* Get the stripe count (number of stripe index) */ @@ -431,8 +381,10 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) dsaddr->stripe_indices = stripe_indices; stripe_indices = NULL; dsaddr->ds_num = num; - - memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id)); + nfs4_init_deviceid_node(&dsaddr->id_node, + NFS_SERVER(ino)->pnfs_curr_ld, + NFS_SERVER(ino)->nfs_client, + &pdev->dev_id); for (i = 0; i < dsaddr->ds_num; i++) { int j; @@ -505,8 +457,8 @@ out_err: static struct nfs4_file_layout_dsaddr * decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) { - struct nfs4_file_layout_dsaddr *d, *new; - long hash; + struct nfs4_deviceid_node *d; + struct nfs4_file_layout_dsaddr *n, *new; new = decode_device(inode, dev, gfp_flags); if (!new) { @@ -515,20 +467,13 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl return NULL; } - spin_lock(&filelayout_deviceid_lock); - d = nfs4_fl_find_get_deviceid(&new->deviceid); - if (d) { - spin_unlock(&filelayout_deviceid_lock); + d = nfs4_insert_deviceid_node(&new->id_node); + n = container_of(d, struct nfs4_file_layout_dsaddr, id_node); + if (n != new) { nfs4_fl_free_deviceid(new); - return d; + return n; } - INIT_HLIST_NODE(&new->node); - atomic_set(&new->ref, 1); - hash = nfs4_fl_deviceid_hash(&new->deviceid); - hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]); - spin_unlock(&filelayout_deviceid_lock); - return new; } @@ -600,35 +545,7 @@ out_free: void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) { - if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) { - hlist_del_rcu(&dsaddr->node); - spin_unlock(&filelayout_deviceid_lock); - - synchronize_rcu(); - nfs4_fl_free_deviceid(dsaddr); - } -} - -struct nfs4_file_layout_dsaddr * -nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id) -{ - struct nfs4_file_layout_dsaddr *d; - struct hlist_node *n; - long hash = nfs4_fl_deviceid_hash(id); - - - rcu_read_lock(); - hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) { - if (!memcmp(&d->deviceid, id, sizeof(*id))) { - if (!atomic_inc_not_zero(&d->ref)) - goto fail; - rcu_read_unlock(); - return d; - } - } -fail: - rcu_read_unlock(); - return NULL; + nfs4_put_deviceid_node(&dsaddr->id_node); } /* @@ -676,15 +593,15 @@ static void filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, int err, u32 ds_addr) { - u32 *p = (u32 *)&dsaddr->deviceid; + u32 *p = (u32 *)&dsaddr->id_node.deviceid; printk(KERN_ERR "NFS: data server %x connection error %d." " Deviceid [%x%x%x%x] marked out of use.\n", ds_addr, err, p[0], p[1], p[2], p[3]); - spin_lock(&filelayout_deviceid_lock); + spin_lock(&nfs4_ds_cache_lock); dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; - spin_unlock(&filelayout_deviceid_lock); + spin_unlock(&nfs4_ds_cache_lock); } struct nfs4_pnfs_ds * diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index cf1b339c393..d2c4b59c896 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -267,9 +267,11 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc break; nfs4_schedule_stateid_recovery(server, state); goto wait_on_recovery; + case -NFS4ERR_EXPIRED: + if (state != NULL) + nfs4_schedule_stateid_recovery(server, state); case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_EXPIRED: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) @@ -2361,6 +2363,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, struct nfs4_state *state = NULL; int status; + if (pnfs_ld_layoutret_on_setattr(inode)) + pnfs_return_layout(inode); + nfs_fattr_init(fattr); /* Search for an existing open(O_WRITE) file */ @@ -3175,6 +3180,11 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, return err; } +void __nfs4_read_done_cb(struct nfs_read_data *data) +{ + nfs_invalidate_atime(data->inode); +} + static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) { struct nfs_server *server = NFS_SERVER(data->inode); @@ -3184,7 +3194,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data) return -EAGAIN; } - nfs_invalidate_atime(data->inode); + __nfs4_read_done_cb(data); if (task->tk_status > 0) renew_lease(server, data->timestamp); return 0; @@ -3198,7 +3208,8 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; - return data->read_done_cb(task, data); + return data->read_done_cb ? data->read_done_cb(task, data) : + nfs4_read_done_cb(task, data); } static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) @@ -3243,7 +3254,8 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (!nfs4_sequence_done(task, &data->res.seq_res)) return -EAGAIN; - return data->write_done_cb(task, data); + return data->write_done_cb ? data->write_done_cb(task, data) : + nfs4_write_done_cb(task, data); } /* Reset the the nfs_write_data to send the write to the MDS. */ @@ -3670,9 +3682,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, break; nfs4_schedule_stateid_recovery(server, state); goto wait_on_recovery; + case -NFS4ERR_EXPIRED: + if (state != NULL) + nfs4_schedule_stateid_recovery(server, state); case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_EXPIRED: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) @@ -4543,6 +4557,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) case -ESTALE: goto out; case -NFS4ERR_EXPIRED: + nfs4_schedule_stateid_recovery(server, state); case -NFS4ERR_STALE_CLIENTID: case -NFS4ERR_STALE_STATEID: nfs4_schedule_lease_recovery(server->nfs_client); @@ -5666,6 +5681,88 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) return status; } +static void +nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutreturn *lrp = calldata; + + dprintk("--> %s\n", __func__); + if (nfs41_setup_sequence(lrp->clp->cl_session, &lrp->args.seq_args, + &lrp->res.seq_res, 0, task)) + return; + rpc_call_start(task); +} + +static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) +{ + struct nfs4_layoutreturn *lrp = calldata; + struct nfs_server *server; + + dprintk("--> %s\n", __func__); + + if (!nfs4_sequence_done(task, &lrp->res.seq_res)) + return; + + server = NFS_SERVER(lrp->args.inode); + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { + nfs_restart_rpc(task, lrp->clp); + return; + } + if (task->tk_status == 0) { + struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; + + if (lrp->res.lrs_present) { + spin_lock(&lo->plh_inode->i_lock); + pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); + spin_unlock(&lo->plh_inode->i_lock); + } else + BUG_ON(!list_empty(&lo->plh_segs)); + } + dprintk("<-- %s\n", __func__); +} + +static void nfs4_layoutreturn_release(void *calldata) +{ + struct nfs4_layoutreturn *lrp = calldata; + + dprintk("--> %s\n", __func__); + put_layout_hdr(NFS_I(lrp->args.inode)->layout); + kfree(calldata); + dprintk("<-- %s\n", __func__); +} + +static const struct rpc_call_ops nfs4_layoutreturn_call_ops = { + .rpc_call_prepare = nfs4_layoutreturn_prepare, + .rpc_call_done = nfs4_layoutreturn_done, + .rpc_release = nfs4_layoutreturn_release, +}; + +int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp) +{ + struct rpc_task *task; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTRETURN], + .rpc_argp = &lrp->args, + .rpc_resp = &lrp->res, + }; + struct rpc_task_setup task_setup_data = { + .rpc_client = lrp->clp->cl_rpcclient, + .rpc_message = &msg, + .callback_ops = &nfs4_layoutreturn_call_ops, + .callback_data = lrp, + }; + int status; + + dprintk("--> %s\n", __func__); + task = rpc_run_task(&task_setup_data); + if (IS_ERR(task)) + return PTR_ERR(task); + status = task->tk_status; + dprintk("<-- %s status=%d\n", __func__, status); + rpc_put_task(task); + return status; +} + static int _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 036f5adc9e1..e97dd219f84 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1466,7 +1466,10 @@ static int nfs4_reclaim_lease(struct nfs_client *clp) #ifdef CONFIG_NFS_V4_1 void nfs4_schedule_session_recovery(struct nfs4_session *session) { - nfs4_schedule_lease_recovery(session->clp); + struct nfs_client *clp = session->clp; + + set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); + nfs4_schedule_lease_recovery(clp); } EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery); @@ -1549,6 +1552,7 @@ static int nfs4_reset_session(struct nfs_client *clp) status = nfs4_recovery_handle_error(clp, status); goto out; } + clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); /* create_session negotiated new slot table */ clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c3ccd2c4683..d869a5e5464 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -338,7 +338,11 @@ static int nfs4_stat_to_errno(int); 1 /* layoutupdate4 layout type */ + \ 1 /* NULL filelayout layoutupdate4 payload */) #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3) - +#define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \ + encode_stateid_maxsz + \ + 1 /* FIXME: opaque lrf_body always empty at the moment */) +#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ + 1 + decode_stateid_maxsz) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -760,7 +764,14 @@ static int nfs4_stat_to_errno(int); decode_putfh_maxsz + \ decode_layoutcommit_maxsz + \ decode_getattr_maxsz) - +#define NFS4_enc_layoutreturn_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_layoutreturn_maxsz) +#define NFS4_dec_layoutreturn_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_layoutreturn_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1864,6 +1875,7 @@ encode_layoutget(struct xdr_stream *xdr, static int encode_layoutcommit(struct xdr_stream *xdr, + struct inode *inode, const struct nfs4_layoutcommit_args *args, struct compound_hdr *hdr) { @@ -1872,7 +1884,7 @@ encode_layoutcommit(struct xdr_stream *xdr, dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten, NFS_SERVER(args->inode)->pnfs_curr_ld->id); - p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE); + p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); *p++ = cpu_to_be32(OP_LAYOUTCOMMIT); /* Only whole file layouts */ p = xdr_encode_hyper(p, 0); /* offset */ @@ -1883,12 +1895,49 @@ encode_layoutcommit(struct xdr_stream *xdr, p = xdr_encode_hyper(p, args->lastbytewritten); *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ - *p++ = cpu_to_be32(0); /* no file layout payload */ + + if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) + NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( + NFS_I(inode)->layout, xdr, args); + else { + p = reserve_space(xdr, 4); + *p = cpu_to_be32(0); /* no layout-type payload */ + } hdr->nops++; hdr->replen += decode_layoutcommit_maxsz; return 0; } + +static void +encode_layoutreturn(struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 20); + *p++ = cpu_to_be32(OP_LAYOUTRETURN); + *p++ = cpu_to_be32(0); /* reclaim. always 0 for now */ + *p++ = cpu_to_be32(args->layout_type); + *p++ = cpu_to_be32(IOMODE_ANY); + *p = cpu_to_be32(RETURN_FILE); + p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE); + p = xdr_encode_hyper(p, 0); + p = xdr_encode_hyper(p, NFS4_MAX_UINT64); + spin_lock(&args->inode->i_lock); + xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE); + spin_unlock(&args->inode->i_lock); + if (NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn) { + NFS_SERVER(args->inode)->pnfs_curr_ld->encode_layoutreturn( + NFS_I(args->inode)->layout, xdr, args); + } else { + p = reserve_space(xdr, 4); + *p = cpu_to_be32(0); + } + hdr->nops++; + hdr->replen += decode_layoutreturn_maxsz; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2706,10 +2755,12 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, /* * Encode LAYOUTCOMMIT request */ -static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, - struct xdr_stream *xdr, - struct nfs4_layoutcommit_args *args) +static void nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_layoutcommit_args *args) { + struct nfs4_layoutcommit_data *data = + container_of(args, struct nfs4_layoutcommit_data, args); struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; @@ -2717,10 +2768,27 @@ static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, NFS_FH(args->inode), &hdr); - encode_layoutcommit(xdr, args, &hdr); + encode_layoutcommit(xdr, data->args.inode, args, &hdr); encode_getfattr(xdr, args->bitmask, &hdr); encode_nops(&hdr); - return 0; +} + +/* + * Encode LAYOUTRETURN request + */ +static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_layoutreturn_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, NFS_FH(args->inode), &hdr); + encode_layoutreturn(xdr, args, &hdr); + encode_nops(&hdr); } #endif /* CONFIG_NFS_V4_1 */ @@ -5203,6 +5271,27 @@ out_overflow: return -EIO; } +static int decode_layoutreturn(struct xdr_stream *xdr, + struct nfs4_layoutreturn_res *res) +{ + __be32 *p; + int status; + + status = decode_op_hdr(xdr, OP_LAYOUTRETURN); + if (status) + return status; + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->lrs_present = be32_to_cpup(p); + if (res->lrs_present) + status = decode_stateid(xdr, &res->stateid); + return status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} + static int decode_layoutcommit(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_layoutcommit_res *res) @@ -6320,6 +6409,30 @@ out: } /* + * Decode LAYOUTRETURN response + */ +static int nfs4_xdr_dec_layoutreturn(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_layoutreturn_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_layoutreturn(xdr, res); +out: + return status; +} + +/* * Decode LAYOUTCOMMIT response */ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, @@ -6547,6 +6660,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), PROC(LAYOUTGET, enc_layoutget, dec_layoutget), PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), + PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index c541093a5bf..c4744e1d513 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -87,7 +87,7 @@ #define NFS_ROOT "/tftpboot/%s" /* Default NFSROOT mount options. */ -#define NFS_DEF_OPTIONS "udp" +#define NFS_DEF_OPTIONS "vers=2,udp,rsize=4096,wsize=4096" /* Parameters passed from the kernel command line */ static char nfs_root_parms[256] __initdata = ""; diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild new file mode 100644 index 00000000000..ed30ea072bb --- /dev/null +++ b/fs/nfs/objlayout/Kbuild @@ -0,0 +1,5 @@ +# +# Makefile for the pNFS Objects Layout Driver kernel module +# +objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o +obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c new file mode 100644 index 00000000000..9cf208df1f2 --- /dev/null +++ b/fs/nfs/objlayout/objio_osd.c @@ -0,0 +1,1057 @@ +/* + * pNFS Objects layout implementation over open-osd initiator library + * + * Copyright (C) 2009 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy <bhalevy@panasas.com> + * Boaz Harrosh <bharrosh@panasas.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/module.h> +#include <scsi/osd_initiator.h> + +#include "objlayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +#define _LLU(x) ((unsigned long long)x) + +enum { BIO_MAX_PAGES_KMALLOC = + (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec), +}; + +struct objio_dev_ent { + struct nfs4_deviceid_node id_node; + struct osd_dev *od; +}; + +static void +objio_free_deviceid_node(struct nfs4_deviceid_node *d) +{ + struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node); + + dprintk("%s: free od=%p\n", __func__, de->od); + osduld_put_device(de->od); + kfree(de); +} + +static struct objio_dev_ent *_dev_list_find(const struct nfs_server *nfss, + const struct nfs4_deviceid *d_id) +{ + struct nfs4_deviceid_node *d; + struct objio_dev_ent *de; + + d = nfs4_find_get_deviceid(nfss->pnfs_curr_ld, nfss->nfs_client, d_id); + if (!d) + return NULL; + + de = container_of(d, struct objio_dev_ent, id_node); + return de; +} + +static struct objio_dev_ent * +_dev_list_add(const struct nfs_server *nfss, + const struct nfs4_deviceid *d_id, struct osd_dev *od, + gfp_t gfp_flags) +{ + struct nfs4_deviceid_node *d; + struct objio_dev_ent *de = kzalloc(sizeof(*de), gfp_flags); + struct objio_dev_ent *n; + + if (!de) { + dprintk("%s: -ENOMEM od=%p\n", __func__, od); + return NULL; + } + + dprintk("%s: Adding od=%p\n", __func__, od); + nfs4_init_deviceid_node(&de->id_node, + nfss->pnfs_curr_ld, + nfss->nfs_client, + d_id); + de->od = od; + + d = nfs4_insert_deviceid_node(&de->id_node); + n = container_of(d, struct objio_dev_ent, id_node); + if (n != de) { + dprintk("%s: Race with other n->od=%p\n", __func__, n->od); + objio_free_deviceid_node(&de->id_node); + de = n; + } + + atomic_inc(&de->id_node.ref); + return de; +} + +struct caps_buffers { + u8 caps_key[OSD_CRYPTO_KEYID_SIZE]; + u8 creds[OSD_CAP_LEN]; +}; + +struct objio_segment { + struct pnfs_layout_segment lseg; + + struct pnfs_osd_object_cred *comps; + + unsigned mirrors_p1; + unsigned stripe_unit; + unsigned group_width; /* Data stripe_units without integrity comps */ + u64 group_depth; + unsigned group_count; + + unsigned max_io_size; + + unsigned comps_index; + unsigned num_comps; + /* variable length */ + struct objio_dev_ent *ods[]; +}; + +static inline struct objio_segment * +OBJIO_LSEG(struct pnfs_layout_segment *lseg) +{ + return container_of(lseg, struct objio_segment, lseg); +} + +struct objio_state; +typedef ssize_t (*objio_done_fn)(struct objio_state *ios); + +struct objio_state { + /* Generic layer */ + struct objlayout_io_state ol_state; + + struct objio_segment *layout; + + struct kref kref; + objio_done_fn done; + void *private; + + unsigned long length; + unsigned numdevs; /* Actually used devs in this IO */ + /* A per-device variable array of size numdevs */ + struct _objio_per_comp { + struct bio *bio; + struct osd_request *or; + unsigned long length; + u64 offset; + unsigned dev; + } per_dev[]; +}; + +/* Send and wait for a get_device_info of devices in the layout, + then look them up with the osd_initiator library */ +static struct objio_dev_ent *_device_lookup(struct pnfs_layout_hdr *pnfslay, + struct objio_segment *objio_seg, unsigned comp, + gfp_t gfp_flags) +{ + struct pnfs_osd_deviceaddr *deviceaddr; + struct nfs4_deviceid *d_id; + struct objio_dev_ent *ode; + struct osd_dev *od; + struct osd_dev_info odi; + int err; + + d_id = &objio_seg->comps[comp].oc_object_id.oid_device_id; + + ode = _dev_list_find(NFS_SERVER(pnfslay->plh_inode), d_id); + if (ode) + return ode; + + err = objlayout_get_deviceinfo(pnfslay, d_id, &deviceaddr, gfp_flags); + if (unlikely(err)) { + dprintk("%s: objlayout_get_deviceinfo dev(%llx:%llx) =>%d\n", + __func__, _DEVID_LO(d_id), _DEVID_HI(d_id), err); + return ERR_PTR(err); + } + + odi.systemid_len = deviceaddr->oda_systemid.len; + if (odi.systemid_len > sizeof(odi.systemid)) { + err = -EINVAL; + goto out; + } else if (odi.systemid_len) + memcpy(odi.systemid, deviceaddr->oda_systemid.data, + odi.systemid_len); + odi.osdname_len = deviceaddr->oda_osdname.len; + odi.osdname = (u8 *)deviceaddr->oda_osdname.data; + + if (!odi.osdname_len && !odi.systemid_len) { + dprintk("%s: !odi.osdname_len && !odi.systemid_len\n", + __func__); + err = -ENODEV; + goto out; + } + + od = osduld_info_lookup(&odi); + if (unlikely(IS_ERR(od))) { + err = PTR_ERR(od); + dprintk("%s: osduld_info_lookup => %d\n", __func__, err); + goto out; + } + + ode = _dev_list_add(NFS_SERVER(pnfslay->plh_inode), d_id, od, + gfp_flags); + +out: + dprintk("%s: return=%d\n", __func__, err); + objlayout_put_deviceinfo(deviceaddr); + return err ? ERR_PTR(err) : ode; +} + +static int objio_devices_lookup(struct pnfs_layout_hdr *pnfslay, + struct objio_segment *objio_seg, + gfp_t gfp_flags) +{ + unsigned i; + int err; + + /* lookup all devices */ + for (i = 0; i < objio_seg->num_comps; i++) { + struct objio_dev_ent *ode; + + ode = _device_lookup(pnfslay, objio_seg, i, gfp_flags); + if (unlikely(IS_ERR(ode))) { + err = PTR_ERR(ode); + goto out; + } + objio_seg->ods[i] = ode; + } + err = 0; + +out: + dprintk("%s: return=%d\n", __func__, err); + return err; +} + +static int _verify_data_map(struct pnfs_osd_layout *layout) +{ + struct pnfs_osd_data_map *data_map = &layout->olo_map; + u64 stripe_length; + u32 group_width; + +/* FIXME: Only raid0 for now. if not go through MDS */ + if (data_map->odm_raid_algorithm != PNFS_OSD_RAID_0) { + printk(KERN_ERR "Only RAID_0 for now\n"); + return -ENOTSUPP; + } + if (0 != (data_map->odm_num_comps % (data_map->odm_mirror_cnt + 1))) { + printk(KERN_ERR "Data Map wrong, num_comps=%u mirrors=%u\n", + data_map->odm_num_comps, data_map->odm_mirror_cnt); + return -EINVAL; + } + + if (data_map->odm_group_width) + group_width = data_map->odm_group_width; + else + group_width = data_map->odm_num_comps / + (data_map->odm_mirror_cnt + 1); + + stripe_length = (u64)data_map->odm_stripe_unit * group_width; + if (stripe_length >= (1ULL << 32)) { + printk(KERN_ERR "Total Stripe length(0x%llx)" + " >= 32bit is not supported\n", _LLU(stripe_length)); + return -ENOTSUPP; + } + + if (0 != (data_map->odm_stripe_unit & ~PAGE_MASK)) { + printk(KERN_ERR "Stripe Unit(0x%llx)" + " must be Multples of PAGE_SIZE(0x%lx)\n", + _LLU(data_map->odm_stripe_unit), PAGE_SIZE); + return -ENOTSUPP; + } + + return 0; +} + +static void copy_single_comp(struct pnfs_osd_object_cred *cur_comp, + struct pnfs_osd_object_cred *src_comp, + struct caps_buffers *caps_p) +{ + WARN_ON(src_comp->oc_cap_key.cred_len > sizeof(caps_p->caps_key)); + WARN_ON(src_comp->oc_cap.cred_len > sizeof(caps_p->creds)); + + *cur_comp = *src_comp; + + memcpy(caps_p->caps_key, src_comp->oc_cap_key.cred, + sizeof(caps_p->caps_key)); + cur_comp->oc_cap_key.cred = caps_p->caps_key; + + memcpy(caps_p->creds, src_comp->oc_cap.cred, + sizeof(caps_p->creds)); + cur_comp->oc_cap.cred = caps_p->creds; +} + +int objio_alloc_lseg(struct pnfs_layout_segment **outp, + struct pnfs_layout_hdr *pnfslay, + struct pnfs_layout_range *range, + struct xdr_stream *xdr, + gfp_t gfp_flags) +{ + struct objio_segment *objio_seg; + struct pnfs_osd_xdr_decode_layout_iter iter; + struct pnfs_osd_layout layout; + struct pnfs_osd_object_cred *cur_comp, src_comp; + struct caps_buffers *caps_p; + int err; + + err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); + if (unlikely(err)) + return err; + + err = _verify_data_map(&layout); + if (unlikely(err)) + return err; + + objio_seg = kzalloc(sizeof(*objio_seg) + + sizeof(objio_seg->ods[0]) * layout.olo_num_comps + + sizeof(*objio_seg->comps) * layout.olo_num_comps + + sizeof(struct caps_buffers) * layout.olo_num_comps, + gfp_flags); + if (!objio_seg) + return -ENOMEM; + + objio_seg->comps = (void *)(objio_seg->ods + layout.olo_num_comps); + cur_comp = objio_seg->comps; + caps_p = (void *)(cur_comp + layout.olo_num_comps); + while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) + copy_single_comp(cur_comp++, &src_comp, caps_p++); + if (unlikely(err)) + goto err; + + objio_seg->num_comps = layout.olo_num_comps; + objio_seg->comps_index = layout.olo_comps_index; + err = objio_devices_lookup(pnfslay, objio_seg, gfp_flags); + if (err) + goto err; + + objio_seg->mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1; + objio_seg->stripe_unit = layout.olo_map.odm_stripe_unit; + if (layout.olo_map.odm_group_width) { + objio_seg->group_width = layout.olo_map.odm_group_width; + objio_seg->group_depth = layout.olo_map.odm_group_depth; + objio_seg->group_count = layout.olo_map.odm_num_comps / + objio_seg->mirrors_p1 / + objio_seg->group_width; + } else { + objio_seg->group_width = layout.olo_map.odm_num_comps / + objio_seg->mirrors_p1; + objio_seg->group_depth = -1; + objio_seg->group_count = 1; + } + + /* Cache this calculation it will hit for every page */ + objio_seg->max_io_size = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - + objio_seg->stripe_unit) * + objio_seg->group_width; + + *outp = &objio_seg->lseg; + return 0; + +err: + kfree(objio_seg); + dprintk("%s: Error: return %d\n", __func__, err); + *outp = NULL; + return err; +} + +void objio_free_lseg(struct pnfs_layout_segment *lseg) +{ + int i; + struct objio_segment *objio_seg = OBJIO_LSEG(lseg); + + for (i = 0; i < objio_seg->num_comps; i++) { + if (!objio_seg->ods[i]) + break; + nfs4_put_deviceid_node(&objio_seg->ods[i]->id_node); + } + kfree(objio_seg); +} + +int objio_alloc_io_state(struct pnfs_layout_segment *lseg, + struct objlayout_io_state **outp, + gfp_t gfp_flags) +{ + struct objio_segment *objio_seg = OBJIO_LSEG(lseg); + struct objio_state *ios; + const unsigned first_size = sizeof(*ios) + + objio_seg->num_comps * sizeof(ios->per_dev[0]); + const unsigned sec_size = objio_seg->num_comps * + sizeof(ios->ol_state.ioerrs[0]); + + ios = kzalloc(first_size + sec_size, gfp_flags); + if (unlikely(!ios)) + return -ENOMEM; + + ios->layout = objio_seg; + ios->ol_state.ioerrs = ((void *)ios) + first_size; + ios->ol_state.num_comps = objio_seg->num_comps; + + *outp = &ios->ol_state; + return 0; +} + +void objio_free_io_state(struct objlayout_io_state *ol_state) +{ + struct objio_state *ios = container_of(ol_state, struct objio_state, + ol_state); + + kfree(ios); +} + +enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) +{ + switch (oep) { + case OSD_ERR_PRI_NO_ERROR: + return (enum pnfs_osd_errno)0; + + case OSD_ERR_PRI_CLEAR_PAGES: + BUG_ON(1); + return 0; + + case OSD_ERR_PRI_RESOURCE: + return PNFS_OSD_ERR_RESOURCE; + case OSD_ERR_PRI_BAD_CRED: + return PNFS_OSD_ERR_BAD_CRED; + case OSD_ERR_PRI_NO_ACCESS: + return PNFS_OSD_ERR_NO_ACCESS; + case OSD_ERR_PRI_UNREACHABLE: + return PNFS_OSD_ERR_UNREACHABLE; + case OSD_ERR_PRI_NOT_FOUND: + return PNFS_OSD_ERR_NOT_FOUND; + case OSD_ERR_PRI_NO_SPACE: + return PNFS_OSD_ERR_NO_SPACE; + default: + WARN_ON(1); + /* fallthrough */ + case OSD_ERR_PRI_EIO: + return PNFS_OSD_ERR_EIO; + } +} + +static void _clear_bio(struct bio *bio) +{ + struct bio_vec *bv; + unsigned i; + + __bio_for_each_segment(bv, bio, i, 0) { + unsigned this_count = bv->bv_len; + + if (likely(PAGE_SIZE == this_count)) + clear_highpage(bv->bv_page); + else + zero_user(bv->bv_page, bv->bv_offset, this_count); + } +} + +static int _io_check(struct objio_state *ios, bool is_write) +{ + enum osd_err_priority oep = OSD_ERR_PRI_NO_ERROR; + int lin_ret = 0; + int i; + + for (i = 0; i < ios->numdevs; i++) { + struct osd_sense_info osi; + struct osd_request *or = ios->per_dev[i].or; + unsigned dev; + int ret; + + if (!or) + continue; + + ret = osd_req_decode_sense(or, &osi); + if (likely(!ret)) + continue; + + if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { + /* start read offset passed endof file */ + BUG_ON(is_write); + _clear_bio(ios->per_dev[i].bio); + dprintk("%s: start read offset passed end of file " + "offset=0x%llx, length=0x%lx\n", __func__, + _LLU(ios->per_dev[i].offset), + ios->per_dev[i].length); + + continue; /* we recovered */ + } + dev = ios->per_dev[i].dev; + objlayout_io_set_result(&ios->ol_state, dev, + &ios->layout->comps[dev].oc_object_id, + osd_pri_2_pnfs_err(osi.osd_err_pri), + ios->per_dev[i].offset, + ios->per_dev[i].length, + is_write); + + if (osi.osd_err_pri >= oep) { + oep = osi.osd_err_pri; + lin_ret = ret; + } + } + + return lin_ret; +} + +/* + * Common IO state helpers. + */ +static void _io_free(struct objio_state *ios) +{ + unsigned i; + + for (i = 0; i < ios->numdevs; i++) { + struct _objio_per_comp *per_dev = &ios->per_dev[i]; + + if (per_dev->or) { + osd_end_request(per_dev->or); + per_dev->or = NULL; + } + + if (per_dev->bio) { + bio_put(per_dev->bio); + per_dev->bio = NULL; + } + } +} + +struct osd_dev *_io_od(struct objio_state *ios, unsigned dev) +{ + unsigned min_dev = ios->layout->comps_index; + unsigned max_dev = min_dev + ios->layout->num_comps; + + BUG_ON(dev < min_dev || max_dev <= dev); + return ios->layout->ods[dev - min_dev]->od; +} + +struct _striping_info { + u64 obj_offset; + u64 group_length; + unsigned dev; + unsigned unit_off; +}; + +static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, + struct _striping_info *si) +{ + u32 stripe_unit = ios->layout->stripe_unit; + u32 group_width = ios->layout->group_width; + u64 group_depth = ios->layout->group_depth; + u32 U = stripe_unit * group_width; + + u64 T = U * group_depth; + u64 S = T * ios->layout->group_count; + u64 M = div64_u64(file_offset, S); + + /* + G = (L - (M * S)) / T + H = (L - (M * S)) % T + */ + u64 LmodU = file_offset - M * S; + u32 G = div64_u64(LmodU, T); + u64 H = LmodU - G * T; + + u32 N = div_u64(H, U); + + div_u64_rem(file_offset, stripe_unit, &si->unit_off); + si->obj_offset = si->unit_off + (N * stripe_unit) + + (M * group_depth * stripe_unit); + + /* "H - (N * U)" is just "H % U" so it's bound to u32 */ + si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width; + si->dev *= ios->layout->mirrors_p1; + + si->group_length = T - H; +} + +static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, + unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, + gfp_t gfp_flags) +{ + unsigned pg = *cur_pg; + struct request_queue *q = + osd_request_queue(_io_od(ios, per_dev->dev)); + + per_dev->length += cur_len; + + if (per_dev->bio == NULL) { + unsigned stripes = ios->layout->num_comps / + ios->layout->mirrors_p1; + unsigned pages_in_stripe = stripes * + (ios->layout->stripe_unit / PAGE_SIZE); + unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / + stripes; + + if (BIO_MAX_PAGES_KMALLOC < bio_size) + bio_size = BIO_MAX_PAGES_KMALLOC; + + per_dev->bio = bio_kmalloc(gfp_flags, bio_size); + if (unlikely(!per_dev->bio)) { + dprintk("Faild to allocate BIO size=%u\n", bio_size); + return -ENOMEM; + } + } + + while (cur_len > 0) { + unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len); + unsigned added_len; + + BUG_ON(ios->ol_state.nr_pages <= pg); + cur_len -= pglen; + + added_len = bio_add_pc_page(q, per_dev->bio, + ios->ol_state.pages[pg], pglen, pgbase); + if (unlikely(pglen != added_len)) + return -ENOMEM; + pgbase = 0; + ++pg; + } + BUG_ON(cur_len); + + *cur_pg = pg; + return 0; +} + +static int _prepare_one_group(struct objio_state *ios, u64 length, + struct _striping_info *si, unsigned *last_pg, + gfp_t gfp_flags) +{ + unsigned stripe_unit = ios->layout->stripe_unit; + unsigned mirrors_p1 = ios->layout->mirrors_p1; + unsigned devs_in_group = ios->layout->group_width * mirrors_p1; + unsigned dev = si->dev; + unsigned first_dev = dev - (dev % devs_in_group); + unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0; + unsigned cur_pg = *last_pg; + int ret = 0; + + while (length) { + struct _objio_per_comp *per_dev = &ios->per_dev[dev]; + unsigned cur_len, page_off = 0; + + if (!per_dev->length) { + per_dev->dev = dev; + if (dev < si->dev) { + per_dev->offset = si->obj_offset + stripe_unit - + si->unit_off; + cur_len = stripe_unit; + } else if (dev == si->dev) { + per_dev->offset = si->obj_offset; + cur_len = stripe_unit - si->unit_off; + page_off = si->unit_off & ~PAGE_MASK; + BUG_ON(page_off && + (page_off != ios->ol_state.pgbase)); + } else { /* dev > si->dev */ + per_dev->offset = si->obj_offset - si->unit_off; + cur_len = stripe_unit; + } + + if (max_comp < dev) + max_comp = dev; + } else { + cur_len = stripe_unit; + } + if (cur_len >= length) + cur_len = length; + + ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev, + cur_len, gfp_flags); + if (unlikely(ret)) + goto out; + + dev += mirrors_p1; + dev = (dev % devs_in_group) + first_dev; + + length -= cur_len; + ios->length += cur_len; + } +out: + ios->numdevs = max_comp + mirrors_p1; + *last_pg = cur_pg; + return ret; +} + +static int _io_rw_pagelist(struct objio_state *ios, gfp_t gfp_flags) +{ + u64 length = ios->ol_state.count; + u64 offset = ios->ol_state.offset; + struct _striping_info si; + unsigned last_pg = 0; + int ret = 0; + + while (length) { + _calc_stripe_info(ios, offset, &si); + + if (length < si.group_length) + si.group_length = length; + + ret = _prepare_one_group(ios, si.group_length, &si, &last_pg, gfp_flags); + if (unlikely(ret)) + goto out; + + offset += si.group_length; + length -= si.group_length; + } + +out: + if (!ios->length) + return ret; + + return 0; +} + +static ssize_t _sync_done(struct objio_state *ios) +{ + struct completion *waiting = ios->private; + + complete(waiting); + return 0; +} + +static void _last_io(struct kref *kref) +{ + struct objio_state *ios = container_of(kref, struct objio_state, kref); + + ios->done(ios); +} + +static void _done_io(struct osd_request *or, void *p) +{ + struct objio_state *ios = p; + + kref_put(&ios->kref, _last_io); +} + +static ssize_t _io_exec(struct objio_state *ios) +{ + DECLARE_COMPLETION_ONSTACK(wait); + ssize_t status = 0; /* sync status */ + unsigned i; + objio_done_fn saved_done_fn = ios->done; + bool sync = ios->ol_state.sync; + + if (sync) { + ios->done = _sync_done; + ios->private = &wait; + } + + kref_init(&ios->kref); + + for (i = 0; i < ios->numdevs; i++) { + struct osd_request *or = ios->per_dev[i].or; + + if (!or) + continue; + + kref_get(&ios->kref); + osd_execute_request_async(or, _done_io, ios); + } + + kref_put(&ios->kref, _last_io); + + if (sync) { + wait_for_completion(&wait); + status = saved_done_fn(ios); + } + + return status; +} + +/* + * read + */ +static ssize_t _read_done(struct objio_state *ios) +{ + ssize_t status; + int ret = _io_check(ios, false); + + _io_free(ios); + + if (likely(!ret)) + status = ios->length; + else + status = ret; + + objlayout_read_done(&ios->ol_state, status, ios->ol_state.sync); + return status; +} + +static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) +{ + struct osd_request *or = NULL; + struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; + unsigned dev = per_dev->dev; + struct pnfs_osd_object_cred *cred = + &ios->layout->comps[dev]; + struct osd_obj_id obj = { + .partition = cred->oc_object_id.oid_partition_id, + .id = cred->oc_object_id.oid_object_id, + }; + int ret; + + or = osd_start_request(_io_od(ios, dev), GFP_KERNEL); + if (unlikely(!or)) { + ret = -ENOMEM; + goto err; + } + per_dev->or = or; + + osd_req_read(or, &obj, per_dev->offset, per_dev->bio, per_dev->length); + + ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); + if (ret) { + dprintk("%s: Faild to osd_finalize_request() => %d\n", + __func__, ret); + goto err; + } + + dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", + __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), + per_dev->length); + +err: + return ret; +} + +static ssize_t _read_exec(struct objio_state *ios) +{ + unsigned i; + int ret; + + for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { + if (!ios->per_dev[i].length) + continue; + ret = _read_mirrors(ios, i); + if (unlikely(ret)) + goto err; + } + + ios->done = _read_done; + return _io_exec(ios); /* In sync mode exec returns the io status */ + +err: + _io_free(ios); + return ret; +} + +ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state) +{ + struct objio_state *ios = container_of(ol_state, struct objio_state, + ol_state); + int ret; + + ret = _io_rw_pagelist(ios, GFP_KERNEL); + if (unlikely(ret)) + return ret; + + return _read_exec(ios); +} + +/* + * write + */ +static ssize_t _write_done(struct objio_state *ios) +{ + ssize_t status; + int ret = _io_check(ios, true); + + _io_free(ios); + + if (likely(!ret)) { + /* FIXME: should be based on the OSD's persistence model + * See OSD2r05 Section 4.13 Data persistence model */ + ios->ol_state.committed = NFS_FILE_SYNC; + status = ios->length; + } else { + status = ret; + } + + objlayout_write_done(&ios->ol_state, status, ios->ol_state.sync); + return status; +} + +static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) +{ + struct _objio_per_comp *master_dev = &ios->per_dev[cur_comp]; + unsigned dev = ios->per_dev[cur_comp].dev; + unsigned last_comp = cur_comp + ios->layout->mirrors_p1; + int ret; + + for (; cur_comp < last_comp; ++cur_comp, ++dev) { + struct osd_request *or = NULL; + struct pnfs_osd_object_cred *cred = + &ios->layout->comps[dev]; + struct osd_obj_id obj = { + .partition = cred->oc_object_id.oid_partition_id, + .id = cred->oc_object_id.oid_object_id, + }; + struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; + struct bio *bio; + + or = osd_start_request(_io_od(ios, dev), GFP_NOFS); + if (unlikely(!or)) { + ret = -ENOMEM; + goto err; + } + per_dev->or = or; + + if (per_dev != master_dev) { + bio = bio_kmalloc(GFP_NOFS, + master_dev->bio->bi_max_vecs); + if (unlikely(!bio)) { + dprintk("Faild to allocate BIO size=%u\n", + master_dev->bio->bi_max_vecs); + ret = -ENOMEM; + goto err; + } + + __bio_clone(bio, master_dev->bio); + bio->bi_bdev = NULL; + bio->bi_next = NULL; + per_dev->bio = bio; + per_dev->dev = dev; + per_dev->length = master_dev->length; + per_dev->offset = master_dev->offset; + } else { + bio = master_dev->bio; + bio->bi_rw |= REQ_WRITE; + } + + osd_req_write(or, &obj, per_dev->offset, bio, per_dev->length); + + ret = osd_finalize_request(or, 0, cred->oc_cap.cred, NULL); + if (ret) { + dprintk("%s: Faild to osd_finalize_request() => %d\n", + __func__, ret); + goto err; + } + + dprintk("%s:[%d] dev=%d obj=0x%llx start=0x%llx length=0x%lx\n", + __func__, cur_comp, dev, obj.id, _LLU(per_dev->offset), + per_dev->length); + } + +err: + return ret; +} + +static ssize_t _write_exec(struct objio_state *ios) +{ + unsigned i; + int ret; + + for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) { + if (!ios->per_dev[i].length) + continue; + ret = _write_mirrors(ios, i); + if (unlikely(ret)) + goto err; + } + + ios->done = _write_done; + return _io_exec(ios); /* In sync mode exec returns the io->status */ + +err: + _io_free(ios); + return ret; +} + +ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, bool stable) +{ + struct objio_state *ios = container_of(ol_state, struct objio_state, + ol_state); + int ret; + + /* TODO: ios->stable = stable; */ + ret = _io_rw_pagelist(ios, GFP_NOFS); + if (unlikely(ret)) + return ret; + + return _write_exec(ios); +} + +static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, + struct nfs_page *prev, struct nfs_page *req) +{ + if (!pnfs_generic_pg_test(pgio, prev, req)) + return false; + + return pgio->pg_count + req->wb_bytes <= + OBJIO_LSEG(pgio->pg_lseg)->max_io_size; +} + +static struct pnfs_layoutdriver_type objlayout_type = { + .id = LAYOUT_OSD2_OBJECTS, + .name = "LAYOUT_OSD2_OBJECTS", + .flags = PNFS_LAYOUTRET_ON_SETATTR, + + .alloc_layout_hdr = objlayout_alloc_layout_hdr, + .free_layout_hdr = objlayout_free_layout_hdr, + + .alloc_lseg = objlayout_alloc_lseg, + .free_lseg = objlayout_free_lseg, + + .read_pagelist = objlayout_read_pagelist, + .write_pagelist = objlayout_write_pagelist, + .pg_test = objio_pg_test, + + .free_deviceid_node = objio_free_deviceid_node, + + .encode_layoutcommit = objlayout_encode_layoutcommit, + .encode_layoutreturn = objlayout_encode_layoutreturn, +}; + +MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); +MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>"); +MODULE_LICENSE("GPL"); + +static int __init +objlayout_init(void) +{ + int ret = pnfs_register_layoutdriver(&objlayout_type); + + if (ret) + printk(KERN_INFO + "%s: Registering OSD pNFS Layout Driver failed: error=%d\n", + __func__, ret); + else + printk(KERN_INFO "%s: Registered OSD pNFS Layout Driver\n", + __func__); + return ret; +} + +static void __exit +objlayout_exit(void) +{ + pnfs_unregister_layoutdriver(&objlayout_type); + printk(KERN_INFO "%s: Unregistered OSD pNFS Layout Driver\n", + __func__); +} + +module_init(objlayout_init); +module_exit(objlayout_exit); diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c new file mode 100644 index 00000000000..dc3956c0de8 --- /dev/null +++ b/fs/nfs/objlayout/objlayout.c @@ -0,0 +1,712 @@ +/* + * pNFS Objects layout driver high level definitions + * + * Copyright (C) 2007 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy <bhalevy@panasas.com> + * Boaz Harrosh <bharrosh@panasas.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <scsi/osd_initiator.h> +#include "objlayout.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD +/* + * Create a objlayout layout structure for the given inode and return it. + */ +struct pnfs_layout_hdr * +objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) +{ + struct objlayout *objlay; + + objlay = kzalloc(sizeof(struct objlayout), gfp_flags); + if (objlay) { + spin_lock_init(&objlay->lock); + INIT_LIST_HEAD(&objlay->err_list); + } + dprintk("%s: Return %p\n", __func__, objlay); + return &objlay->pnfs_layout; +} + +/* + * Free an objlayout layout structure + */ +void +objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo) +{ + struct objlayout *objlay = OBJLAYOUT(lo); + + dprintk("%s: objlay %p\n", __func__, objlay); + + WARN_ON(!list_empty(&objlay->err_list)); + kfree(objlay); +} + +/* + * Unmarshall layout and store it in pnfslay. + */ +struct pnfs_layout_segment * +objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay, + struct nfs4_layoutget_res *lgr, + gfp_t gfp_flags) +{ + int status = -ENOMEM; + struct xdr_stream stream; + struct xdr_buf buf = { + .pages = lgr->layoutp->pages, + .page_len = lgr->layoutp->len, + .buflen = lgr->layoutp->len, + .len = lgr->layoutp->len, + }; + struct page *scratch; + struct pnfs_layout_segment *lseg; + + dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay); + + scratch = alloc_page(gfp_flags); + if (!scratch) + goto err_nofree; + + xdr_init_decode(&stream, &buf, NULL); + xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); + + status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags); + if (unlikely(status)) { + dprintk("%s: objio_alloc_lseg Return err %d\n", __func__, + status); + goto err; + } + + __free_page(scratch); + + dprintk("%s: Return %p\n", __func__, lseg); + return lseg; + +err: + __free_page(scratch); +err_nofree: + dprintk("%s: Err Return=>%d\n", __func__, status); + return ERR_PTR(status); +} + +/* + * Free a layout segement + */ +void +objlayout_free_lseg(struct pnfs_layout_segment *lseg) +{ + dprintk("%s: freeing layout segment %p\n", __func__, lseg); + + if (unlikely(!lseg)) + return; + + objio_free_lseg(lseg); +} + +/* + * I/O Operations + */ +static inline u64 +end_offset(u64 start, u64 len) +{ + u64 end; + + end = start + len; + return end >= start ? end : NFS4_MAX_UINT64; +} + +/* last octet in a range */ +static inline u64 +last_byte_offset(u64 start, u64 len) +{ + u64 end; + + BUG_ON(!len); + end = start + len; + return end > start ? end - 1 : NFS4_MAX_UINT64; +} + +static struct objlayout_io_state * +objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, + struct page **pages, + unsigned pgbase, + loff_t offset, + size_t count, + struct pnfs_layout_segment *lseg, + void *rpcdata, + gfp_t gfp_flags) +{ + struct objlayout_io_state *state; + u64 lseg_end_offset; + + dprintk("%s: allocating io_state\n", __func__); + if (objio_alloc_io_state(lseg, &state, gfp_flags)) + return NULL; + + BUG_ON(offset < lseg->pls_range.offset); + lseg_end_offset = end_offset(lseg->pls_range.offset, + lseg->pls_range.length); + BUG_ON(offset >= lseg_end_offset); + if (offset + count > lseg_end_offset) { + count = lseg->pls_range.length - + (offset - lseg->pls_range.offset); + dprintk("%s: truncated count %Zd\n", __func__, count); + } + + if (pgbase > PAGE_SIZE) { + pages += pgbase >> PAGE_SHIFT; + pgbase &= ~PAGE_MASK; + } + + INIT_LIST_HEAD(&state->err_list); + state->lseg = lseg; + state->rpcdata = rpcdata; + state->pages = pages; + state->pgbase = pgbase; + state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; + state->offset = offset; + state->count = count; + state->sync = 0; + + return state; +} + +static void +objlayout_free_io_state(struct objlayout_io_state *state) +{ + dprintk("%s: freeing io_state\n", __func__); + if (unlikely(!state)) + return; + + objio_free_io_state(state); +} + +/* + * I/O done common code + */ +static void +objlayout_iodone(struct objlayout_io_state *state) +{ + dprintk("%s: state %p status\n", __func__, state); + + if (likely(state->status >= 0)) { + objlayout_free_io_state(state); + } else { + struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); + + spin_lock(&objlay->lock); + objlay->delta_space_valid = OBJ_DSU_INVALID; + list_add(&objlay->err_list, &state->err_list); + spin_unlock(&objlay->lock); + } +} + +/* + * objlayout_io_set_result - Set an osd_error code on a specific osd comp. + * + * The @index component IO failed (error returned from target). Register + * the error for later reporting at layout-return. + */ +void +objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, + struct pnfs_osd_objid *pooid, int osd_error, + u64 offset, u64 length, bool is_write) +{ + struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index]; + + BUG_ON(index >= state->num_comps); + if (osd_error) { + ioerr->oer_component = *pooid; + ioerr->oer_comp_offset = offset; + ioerr->oer_comp_length = length; + ioerr->oer_iswrite = is_write; + ioerr->oer_errno = osd_error; + + dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) " + "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n", + __func__, index, ioerr->oer_errno, + ioerr->oer_iswrite, + _DEVID_LO(&ioerr->oer_component.oid_device_id), + _DEVID_HI(&ioerr->oer_component.oid_device_id), + ioerr->oer_component.oid_partition_id, + ioerr->oer_component.oid_object_id, + ioerr->oer_comp_offset, + ioerr->oer_comp_length); + } else { + /* User need not call if no error is reported */ + ioerr->oer_errno = 0; + } +} + +/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete(). + * This is because the osd completion is called with ints-off from + * the block layer + */ +static void _rpc_read_complete(struct work_struct *work) +{ + struct rpc_task *task; + struct nfs_read_data *rdata; + + dprintk("%s enter\n", __func__); + task = container_of(work, struct rpc_task, u.tk_work); + rdata = container_of(task, struct nfs_read_data, task); + + pnfs_ld_read_done(rdata); +} + +void +objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) +{ + int eof = state->eof; + struct nfs_read_data *rdata; + + state->status = status; + dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof); + rdata = state->rpcdata; + rdata->task.tk_status = status; + if (status >= 0) { + rdata->res.count = status; + rdata->res.eof = eof; + } + objlayout_iodone(state); + /* must not use state after this point */ + + if (sync) + pnfs_ld_read_done(rdata); + else { + INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); + schedule_work(&rdata->task.u.tk_work); + } +} + +/* + * Perform sync or async reads. + */ +enum pnfs_try_status +objlayout_read_pagelist(struct nfs_read_data *rdata) +{ + loff_t offset = rdata->args.offset; + size_t count = rdata->args.count; + struct objlayout_io_state *state; + ssize_t status = 0; + loff_t eof; + + dprintk("%s: Begin inode %p offset %llu count %d\n", + __func__, rdata->inode, offset, (int)count); + + eof = i_size_read(rdata->inode); + if (unlikely(offset + count > eof)) { + if (offset >= eof) { + status = 0; + rdata->res.count = 0; + rdata->res.eof = 1; + goto out; + } + count = eof - offset; + } + + state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout, + rdata->args.pages, rdata->args.pgbase, + offset, count, + rdata->lseg, rdata, + GFP_KERNEL); + if (unlikely(!state)) { + status = -ENOMEM; + goto out; + } + + state->eof = state->offset + state->count >= eof; + + status = objio_read_pagelist(state); + out: + dprintk("%s: Return status %Zd\n", __func__, status); + rdata->pnfs_error = status; + return PNFS_ATTEMPTED; +} + +/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete(). + * This is because the osd completion is called with ints-off from + * the block layer + */ +static void _rpc_write_complete(struct work_struct *work) +{ + struct rpc_task *task; + struct nfs_write_data *wdata; + + dprintk("%s enter\n", __func__); + task = container_of(work, struct rpc_task, u.tk_work); + wdata = container_of(task, struct nfs_write_data, task); + + pnfs_ld_write_done(wdata); +} + +void +objlayout_write_done(struct objlayout_io_state *state, ssize_t status, + bool sync) +{ + struct nfs_write_data *wdata; + + dprintk("%s: Begin\n", __func__); + wdata = state->rpcdata; + state->status = status; + wdata->task.tk_status = status; + if (status >= 0) { + wdata->res.count = status; + wdata->verf.committed = state->committed; + dprintk("%s: Return status %d committed %d\n", + __func__, wdata->task.tk_status, + wdata->verf.committed); + } else + dprintk("%s: Return status %d\n", + __func__, wdata->task.tk_status); + objlayout_iodone(state); + /* must not use state after this point */ + + if (sync) + pnfs_ld_write_done(wdata); + else { + INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); + schedule_work(&wdata->task.u.tk_work); + } +} + +/* + * Perform sync or async writes. + */ +enum pnfs_try_status +objlayout_write_pagelist(struct nfs_write_data *wdata, + int how) +{ + struct objlayout_io_state *state; + ssize_t status; + + dprintk("%s: Begin inode %p offset %llu count %u\n", + __func__, wdata->inode, wdata->args.offset, wdata->args.count); + + state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout, + wdata->args.pages, + wdata->args.pgbase, + wdata->args.offset, + wdata->args.count, + wdata->lseg, wdata, + GFP_NOFS); + if (unlikely(!state)) { + status = -ENOMEM; + goto out; + } + + state->sync = how & FLUSH_SYNC; + + status = objio_write_pagelist(state, how & FLUSH_STABLE); + out: + dprintk("%s: Return status %Zd\n", __func__, status); + wdata->pnfs_error = status; + return PNFS_ATTEMPTED; +} + +void +objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay, + struct xdr_stream *xdr, + const struct nfs4_layoutcommit_args *args) +{ + struct objlayout *objlay = OBJLAYOUT(pnfslay); + struct pnfs_osd_layoutupdate lou; + __be32 *start; + + dprintk("%s: Begin\n", __func__); + + spin_lock(&objlay->lock); + lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID); + lou.dsu_delta = objlay->delta_space_used; + objlay->delta_space_used = 0; + objlay->delta_space_valid = OBJ_DSU_INIT; + lou.olu_ioerr_flag = !list_empty(&objlay->err_list); + spin_unlock(&objlay->lock); + + start = xdr_reserve_space(xdr, 4); + + BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou)); + + *start = cpu_to_be32((xdr->p - start - 1) * 4); + + dprintk("%s: Return delta_space_used %lld err %d\n", __func__, + lou.dsu_delta, lou.olu_ioerr_flag); +} + +static int +err_prio(u32 oer_errno) +{ + switch (oer_errno) { + case 0: + return 0; + + case PNFS_OSD_ERR_RESOURCE: + return OSD_ERR_PRI_RESOURCE; + case PNFS_OSD_ERR_BAD_CRED: + return OSD_ERR_PRI_BAD_CRED; + case PNFS_OSD_ERR_NO_ACCESS: + return OSD_ERR_PRI_NO_ACCESS; + case PNFS_OSD_ERR_UNREACHABLE: + return OSD_ERR_PRI_UNREACHABLE; + case PNFS_OSD_ERR_NOT_FOUND: + return OSD_ERR_PRI_NOT_FOUND; + case PNFS_OSD_ERR_NO_SPACE: + return OSD_ERR_PRI_NO_SPACE; + default: + WARN_ON(1); + /* fallthrough */ + case PNFS_OSD_ERR_EIO: + return OSD_ERR_PRI_EIO; + } +} + +static void +merge_ioerr(struct pnfs_osd_ioerr *dest_err, + const struct pnfs_osd_ioerr *src_err) +{ + u64 dest_end, src_end; + + if (!dest_err->oer_errno) { + *dest_err = *src_err; + /* accumulated device must be blank */ + memset(&dest_err->oer_component.oid_device_id, 0, + sizeof(dest_err->oer_component.oid_device_id)); + + return; + } + + if (dest_err->oer_component.oid_partition_id != + src_err->oer_component.oid_partition_id) + dest_err->oer_component.oid_partition_id = 0; + + if (dest_err->oer_component.oid_object_id != + src_err->oer_component.oid_object_id) + dest_err->oer_component.oid_object_id = 0; + + if (dest_err->oer_comp_offset > src_err->oer_comp_offset) + dest_err->oer_comp_offset = src_err->oer_comp_offset; + + dest_end = end_offset(dest_err->oer_comp_offset, + dest_err->oer_comp_length); + src_end = end_offset(src_err->oer_comp_offset, + src_err->oer_comp_length); + if (dest_end < src_end) + dest_end = src_end; + + dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset; + + if ((src_err->oer_iswrite == dest_err->oer_iswrite) && + (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) { + dest_err->oer_errno = src_err->oer_errno; + } else if (src_err->oer_iswrite) { + dest_err->oer_iswrite = true; + dest_err->oer_errno = src_err->oer_errno; + } +} + +static void +encode_accumulated_error(struct objlayout *objlay, __be32 *p) +{ + struct objlayout_io_state *state, *tmp; + struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; + + list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { + unsigned i; + + for (i = 0; i < state->num_comps; i++) { + struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; + + if (!ioerr->oer_errno) + continue; + + printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " + "dev(%llx:%llx) par=0x%llx obj=0x%llx " + "offset=0x%llx length=0x%llx\n", + __func__, i, ioerr->oer_errno, + ioerr->oer_iswrite, + _DEVID_LO(&ioerr->oer_component.oid_device_id), + _DEVID_HI(&ioerr->oer_component.oid_device_id), + ioerr->oer_component.oid_partition_id, + ioerr->oer_component.oid_object_id, + ioerr->oer_comp_offset, + ioerr->oer_comp_length); + + merge_ioerr(&accumulated_err, ioerr); + } + list_del(&state->err_list); + objlayout_free_io_state(state); + } + + pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); +} + +void +objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, + struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args) +{ + struct objlayout *objlay = OBJLAYOUT(pnfslay); + struct objlayout_io_state *state, *tmp; + __be32 *start; + + dprintk("%s: Begin\n", __func__); + start = xdr_reserve_space(xdr, 4); + BUG_ON(!start); + + spin_lock(&objlay->lock); + + list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { + __be32 *last_xdr = NULL, *p; + unsigned i; + int res = 0; + + for (i = 0; i < state->num_comps; i++) { + struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; + + if (!ioerr->oer_errno) + continue; + + dprintk("%s: err[%d]: errno=%d is_write=%d " + "dev(%llx:%llx) par=0x%llx obj=0x%llx " + "offset=0x%llx length=0x%llx\n", + __func__, i, ioerr->oer_errno, + ioerr->oer_iswrite, + _DEVID_LO(&ioerr->oer_component.oid_device_id), + _DEVID_HI(&ioerr->oer_component.oid_device_id), + ioerr->oer_component.oid_partition_id, + ioerr->oer_component.oid_object_id, + ioerr->oer_comp_offset, + ioerr->oer_comp_length); + + p = pnfs_osd_xdr_ioerr_reserve_space(xdr); + if (unlikely(!p)) { + res = -E2BIG; + break; /* accumulated_error */ + } + + last_xdr = p; + pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]); + } + + /* TODO: use xdr_write_pages */ + if (unlikely(res)) { + /* no space for even one error descriptor */ + BUG_ON(!last_xdr); + + /* we've encountered a situation with lots and lots of + * errors and no space to encode them all. Use the last + * available slot to report the union of all the + * remaining errors. + */ + encode_accumulated_error(objlay, last_xdr); + goto loop_done; + } + list_del(&state->err_list); + objlayout_free_io_state(state); + } +loop_done: + spin_unlock(&objlay->lock); + + *start = cpu_to_be32((xdr->p - start - 1) * 4); + dprintk("%s: Return\n", __func__); +} + + +/* + * Get Device Info API for io engines + */ +struct objlayout_deviceinfo { + struct page *page; + struct pnfs_osd_deviceaddr da; /* This must be last */ +}; + +/* Initialize and call nfs_getdeviceinfo, then decode and return a + * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo() + * should be called. + */ +int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, + struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, + gfp_t gfp_flags) +{ + struct objlayout_deviceinfo *odi; + struct pnfs_device pd; + struct super_block *sb; + struct page *page, **pages; + u32 *p; + int err; + + page = alloc_page(gfp_flags); + if (!page) + return -ENOMEM; + + pages = &page; + pd.pages = pages; + + memcpy(&pd.dev_id, d_id, sizeof(*d_id)); + pd.layout_type = LAYOUT_OSD2_OBJECTS; + pd.pages = &page; + pd.pgbase = 0; + pd.pglen = PAGE_SIZE; + pd.mincount = 0; + + sb = pnfslay->plh_inode->i_sb; + err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd); + dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); + if (err) + goto err_out; + + p = page_address(page); + odi = kzalloc(sizeof(*odi), gfp_flags); + if (!odi) { + err = -ENOMEM; + goto err_out; + } + pnfs_osd_xdr_decode_deviceaddr(&odi->da, p); + odi->page = page; + *deviceaddr = &odi->da; + return 0; + +err_out: + __free_page(page); + return err; +} + +void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) +{ + struct objlayout_deviceinfo *odi = container_of(deviceaddr, + struct objlayout_deviceinfo, + da); + + __free_page(odi->page); + kfree(odi); +} diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h new file mode 100644 index 00000000000..a8244c8e042 --- /dev/null +++ b/fs/nfs/objlayout/objlayout.h @@ -0,0 +1,187 @@ +/* + * Data types and function declerations for interfacing with the + * pNFS standard object layout driver. + * + * Copyright (C) 2007 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy <bhalevy@panasas.com> + * Boaz Harrosh <bharrosh@panasas.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _OBJLAYOUT_H +#define _OBJLAYOUT_H + +#include <linux/nfs_fs.h> +#include <linux/pnfs_osd_xdr.h> +#include "../pnfs.h" + +/* + * per-inode layout + */ +struct objlayout { + struct pnfs_layout_hdr pnfs_layout; + + /* for layout_commit */ + enum osd_delta_space_valid_enum { + OBJ_DSU_INIT = 0, + OBJ_DSU_VALID, + OBJ_DSU_INVALID, + } delta_space_valid; + s64 delta_space_used; /* consumed by write ops */ + + /* for layout_return */ + spinlock_t lock; + struct list_head err_list; +}; + +static inline struct objlayout * +OBJLAYOUT(struct pnfs_layout_hdr *lo) +{ + return container_of(lo, struct objlayout, pnfs_layout); +} + +/* + * per-I/O operation state + * embedded in objects provider io_state data structure + */ +struct objlayout_io_state { + struct pnfs_layout_segment *lseg; + + struct page **pages; + unsigned pgbase; + unsigned nr_pages; + unsigned long count; + loff_t offset; + bool sync; + + void *rpcdata; + int status; /* res */ + int eof; /* res */ + int committed; /* res */ + + /* Error reporting (layout_return) */ + struct list_head err_list; + unsigned num_comps; + /* Pointer to array of error descriptors of size num_comps. + * It should contain as many entries as devices in the osd_layout + * that participate in the I/O. It is up to the io_engine to allocate + * needed space and set num_comps. + */ + struct pnfs_osd_ioerr *ioerrs; +}; + +/* + * Raid engine I/O API + */ +extern int objio_alloc_lseg(struct pnfs_layout_segment **outp, + struct pnfs_layout_hdr *pnfslay, + struct pnfs_layout_range *range, + struct xdr_stream *xdr, + gfp_t gfp_flags); +extern void objio_free_lseg(struct pnfs_layout_segment *lseg); + +extern int objio_alloc_io_state( + struct pnfs_layout_segment *lseg, + struct objlayout_io_state **outp, + gfp_t gfp_flags); +extern void objio_free_io_state(struct objlayout_io_state *state); + +extern ssize_t objio_read_pagelist(struct objlayout_io_state *ol_state); +extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, + bool stable); + +/* + * callback API + */ +extern void objlayout_io_set_result(struct objlayout_io_state *state, + unsigned index, struct pnfs_osd_objid *pooid, + int osd_error, u64 offset, u64 length, bool is_write); + +static inline void +objlayout_add_delta_space_used(struct objlayout_io_state *state, s64 space_used) +{ + struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); + + /* If one of the I/Os errored out and the delta_space_used was + * invalid we render the complete report as invalid. Protocol mandate + * the DSU be accurate or not reported. + */ + spin_lock(&objlay->lock); + if (objlay->delta_space_valid != OBJ_DSU_INVALID) { + objlay->delta_space_valid = OBJ_DSU_VALID; + objlay->delta_space_used += space_used; + } + spin_unlock(&objlay->lock); +} + +extern void objlayout_read_done(struct objlayout_io_state *state, + ssize_t status, bool sync); +extern void objlayout_write_done(struct objlayout_io_state *state, + ssize_t status, bool sync); + +extern int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, + struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, + gfp_t gfp_flags); +extern void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr); + +/* + * exported generic objects function vectors + */ + +extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags); +extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *); + +extern struct pnfs_layout_segment *objlayout_alloc_lseg( + struct pnfs_layout_hdr *, + struct nfs4_layoutget_res *, + gfp_t gfp_flags); +extern void objlayout_free_lseg(struct pnfs_layout_segment *); + +extern enum pnfs_try_status objlayout_read_pagelist( + struct nfs_read_data *); + +extern enum pnfs_try_status objlayout_write_pagelist( + struct nfs_write_data *, + int how); + +extern void objlayout_encode_layoutcommit( + struct pnfs_layout_hdr *, + struct xdr_stream *, + const struct nfs4_layoutcommit_args *); + +extern void objlayout_encode_layoutreturn( + struct pnfs_layout_hdr *, + struct xdr_stream *, + const struct nfs4_layoutreturn_args *); + +#endif /* _OBJLAYOUT_H */ diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c new file mode 100644 index 00000000000..16fc758e912 --- /dev/null +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c @@ -0,0 +1,412 @@ +/* + * Object-Based pNFS Layout XDR layer + * + * Copyright (C) 2007 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy <bhalevy@panasas.com> + * Boaz Harrosh <bharrosh@panasas.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <linux/pnfs_osd_xdr.h> + +#define NFSDBG_FACILITY NFSDBG_PNFS_LD + +/* + * The following implementation is based on RFC5664 + */ + +/* + * struct pnfs_osd_objid { + * struct nfs4_deviceid oid_device_id; + * u64 oid_partition_id; + * u64 oid_object_id; + * }; // xdr size 32 bytes + */ +static __be32 * +_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid) +{ + p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data, + sizeof(objid->oid_device_id.data)); + + p = xdr_decode_hyper(p, &objid->oid_partition_id); + p = xdr_decode_hyper(p, &objid->oid_object_id); + return p; +} +/* + * struct pnfs_osd_opaque_cred { + * u32 cred_len; + * void *cred; + * }; // xdr size [variable] + * The return pointers are from the xdr buffer + */ +static int +_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred, + struct xdr_stream *xdr) +{ + __be32 *p = xdr_inline_decode(xdr, 1); + + if (!p) + return -EINVAL; + + opaque_cred->cred_len = be32_to_cpu(*p++); + + p = xdr_inline_decode(xdr, opaque_cred->cred_len); + if (!p) + return -EINVAL; + + opaque_cred->cred = p; + return 0; +} + +/* + * struct pnfs_osd_object_cred { + * struct pnfs_osd_objid oc_object_id; + * u32 oc_osd_version; + * u32 oc_cap_key_sec; + * struct pnfs_osd_opaque_cred oc_cap_key + * struct pnfs_osd_opaque_cred oc_cap; + * }; // xdr size 32 + 4 + 4 + [variable] + [variable] + */ +static int +_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp, + struct xdr_stream *xdr) +{ + __be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4); + int ret; + + if (!p) + return -EIO; + + p = _osd_xdr_decode_objid(p, &comp->oc_object_id); + comp->oc_osd_version = be32_to_cpup(p++); + comp->oc_cap_key_sec = be32_to_cpup(p); + + ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr); + if (unlikely(ret)) + return ret; + + ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr); + return ret; +} + +/* + * struct pnfs_osd_data_map { + * u32 odm_num_comps; + * u64 odm_stripe_unit; + * u32 odm_group_width; + * u32 odm_group_depth; + * u32 odm_mirror_cnt; + * u32 odm_raid_algorithm; + * }; // xdr size 4 + 8 + 4 + 4 + 4 + 4 + */ +static inline int +_osd_data_map_xdr_sz(void) +{ + return 4 + 8 + 4 + 4 + 4 + 4; +} + +static __be32 * +_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map) +{ + data_map->odm_num_comps = be32_to_cpup(p++); + p = xdr_decode_hyper(p, &data_map->odm_stripe_unit); + data_map->odm_group_width = be32_to_cpup(p++); + data_map->odm_group_depth = be32_to_cpup(p++); + data_map->odm_mirror_cnt = be32_to_cpup(p++); + data_map->odm_raid_algorithm = be32_to_cpup(p++); + dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u " + "odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n", + __func__, + data_map->odm_num_comps, + (unsigned long long)data_map->odm_stripe_unit, + data_map->odm_group_width, + data_map->odm_group_depth, + data_map->odm_mirror_cnt, + data_map->odm_raid_algorithm); + return p; +} + +int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr) +{ + __be32 *p; + + memset(iter, 0, sizeof(*iter)); + + p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4); + if (unlikely(!p)) + return -EINVAL; + + p = _osd_xdr_decode_data_map(p, &layout->olo_map); + layout->olo_comps_index = be32_to_cpup(p++); + layout->olo_num_comps = be32_to_cpup(p++); + iter->total_comps = layout->olo_num_comps; + return 0; +} + +bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, + int *err) +{ + BUG_ON(iter->decoded_comps > iter->total_comps); + if (iter->decoded_comps == iter->total_comps) + return false; + + *err = _osd_xdr_decode_object_cred(comp, xdr); + if (unlikely(*err)) { + dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d " + "total_comps=%d\n", __func__, *err, + iter->decoded_comps, iter->total_comps); + return false; /* stop the loop */ + } + dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx " + "key_len=%u cap_len=%u\n", + __func__, + _DEVID_LO(&comp->oc_object_id.oid_device_id), + _DEVID_HI(&comp->oc_object_id.oid_device_id), + comp->oc_object_id.oid_partition_id, + comp->oc_object_id.oid_object_id, + comp->oc_cap_key.cred_len, comp->oc_cap.cred_len); + + iter->decoded_comps++; + return true; +} + +/* + * Get Device Information Decoding + * + * Note: since Device Information is currently done synchronously, all + * variable strings fields are left inside the rpc buffer and are only + * pointed to by the pnfs_osd_deviceaddr members. So the read buffer + * should not be freed while the returned information is in use. + */ +/* + *struct nfs4_string { + * unsigned int len; + * char *data; + *}; // size [variable] + * NOTE: Returned string points to inside the XDR buffer + */ +static __be32 * +__read_u8_opaque(__be32 *p, struct nfs4_string *str) +{ + str->len = be32_to_cpup(p++); + str->data = (char *)p; + + p += XDR_QUADLEN(str->len); + return p; +} + +/* + * struct pnfs_osd_targetid { + * u32 oti_type; + * struct nfs4_string oti_scsi_device_id; + * };// size 4 + [variable] + */ +static __be32 * +__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid) +{ + u32 oti_type; + + oti_type = be32_to_cpup(p++); + targetid->oti_type = oti_type; + + switch (oti_type) { + case OBJ_TARGET_SCSI_NAME: + case OBJ_TARGET_SCSI_DEVICE_ID: + p = __read_u8_opaque(p, &targetid->oti_scsi_device_id); + } + + return p; +} + +/* + * struct pnfs_osd_net_addr { + * struct nfs4_string r_netid; + * struct nfs4_string r_addr; + * }; + */ +static __be32 * +__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr) +{ + p = __read_u8_opaque(p, &netaddr->r_netid); + p = __read_u8_opaque(p, &netaddr->r_addr); + + return p; +} + +/* + * struct pnfs_osd_targetaddr { + * u32 ota_available; + * struct pnfs_osd_net_addr ota_netaddr; + * }; + */ +static __be32 * +__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr) +{ + u32 ota_available; + + ota_available = be32_to_cpup(p++); + targetaddr->ota_available = ota_available; + + if (ota_available) + p = __read_net_addr(p, &targetaddr->ota_netaddr); + + + return p; +} + +/* + * struct pnfs_osd_deviceaddr { + * struct pnfs_osd_targetid oda_targetid; + * struct pnfs_osd_targetaddr oda_targetaddr; + * u8 oda_lun[8]; + * struct nfs4_string oda_systemid; + * struct pnfs_osd_object_cred oda_root_obj_cred; + * struct nfs4_string oda_osdname; + * }; + */ + +/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does + * not have an xdr_stream + */ +static __be32 * +__read_opaque_cred(__be32 *p, + struct pnfs_osd_opaque_cred *opaque_cred) +{ + opaque_cred->cred_len = be32_to_cpu(*p++); + opaque_cred->cred = p; + return p + XDR_QUADLEN(opaque_cred->cred_len); +} + +static __be32 * +__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp) +{ + p = _osd_xdr_decode_objid(p, &comp->oc_object_id); + comp->oc_osd_version = be32_to_cpup(p++); + comp->oc_cap_key_sec = be32_to_cpup(p++); + + p = __read_opaque_cred(p, &comp->oc_cap_key); + p = __read_opaque_cred(p, &comp->oc_cap); + return p; +} + +void pnfs_osd_xdr_decode_deviceaddr( + struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p) +{ + p = __read_targetid(p, &deviceaddr->oda_targetid); + + p = __read_targetaddr(p, &deviceaddr->oda_targetaddr); + + p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun, + sizeof(deviceaddr->oda_lun)); + + p = __read_u8_opaque(p, &deviceaddr->oda_systemid); + + p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred); + + p = __read_u8_opaque(p, &deviceaddr->oda_osdname); + + /* libosd likes this terminated in dbg. It's last, so no problems */ + deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0; +} + +/* + * struct pnfs_osd_layoutupdate { + * u32 dsu_valid; + * s64 dsu_delta; + * u32 olu_ioerr_flag; + * }; xdr size 4 + 8 + 4 + */ +int +pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, + struct pnfs_osd_layoutupdate *lou) +{ + __be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4); + + if (!p) + return -E2BIG; + + *p++ = cpu_to_be32(lou->dsu_valid); + if (lou->dsu_valid) + p = xdr_encode_hyper(p, lou->dsu_delta); + *p++ = cpu_to_be32(lou->olu_ioerr_flag); + return 0; +} + +/* + * struct pnfs_osd_objid { + * struct nfs4_deviceid oid_device_id; + * u64 oid_partition_id; + * u64 oid_object_id; + * }; // xdr size 32 bytes + */ +static inline __be32 * +pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id) +{ + p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data, + sizeof(object_id->oid_device_id.data)); + p = xdr_encode_hyper(p, object_id->oid_partition_id); + p = xdr_encode_hyper(p, object_id->oid_object_id); + + return p; +} + +/* + * struct pnfs_osd_ioerr { + * struct pnfs_osd_objid oer_component; + * u64 oer_comp_offset; + * u64 oer_comp_length; + * u32 oer_iswrite; + * u32 oer_errno; + * }; // xdr size 32 + 24 bytes + */ +void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr) +{ + p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component); + p = xdr_encode_hyper(p, ioerr->oer_comp_offset); + p = xdr_encode_hyper(p, ioerr->oer_comp_length); + *p++ = cpu_to_be32(ioerr->oer_iswrite); + *p = cpu_to_be32(ioerr->oer_errno); +} + +__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 32 + 24); + if (unlikely(!p)) + dprintk("%s: out of xdr space\n", __func__); + + return p; +} diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index c80add6e221..7913961aff2 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -204,6 +204,21 @@ nfs_wait_on_request(struct nfs_page *req) TASK_UNINTERRUPTIBLE); } +static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req) +{ + /* + * FIXME: ideally we should be able to coalesce all requests + * that are not block boundary aligned, but currently this + * is problematic for the case of bsize < PAGE_CACHE_SIZE, + * since nfs_flush_multi and nfs_pagein_multi assume you + * can have only one struct nfs_page. + */ + if (desc->pg_bsize < PAGE_SIZE) + return 0; + + return desc->pg_count + req->wb_bytes <= desc->pg_bsize; +} + /** * nfs_pageio_init - initialise a page io descriptor * @desc: pointer to descriptor @@ -229,6 +244,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; + desc->pg_test = nfs_generic_pg_test; + pnfs_pageio_init(desc, inode); } /** @@ -242,29 +259,23 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, * * Return 'true' if this is the case, else return 'false'. */ -static int nfs_can_coalesce_requests(struct nfs_page *prev, - struct nfs_page *req, - struct nfs_pageio_descriptor *pgio) +static bool nfs_can_coalesce_requests(struct nfs_page *prev, + struct nfs_page *req, + struct nfs_pageio_descriptor *pgio) { if (req->wb_context->cred != prev->wb_context->cred) - return 0; + return false; if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) - return 0; + return false; if (req->wb_context->state != prev->wb_context->state) - return 0; + return false; if (req->wb_index != (prev->wb_index + 1)) - return 0; + return false; if (req->wb_pgbase != 0) - return 0; + return false; if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) - return 0; - /* - * Non-whole file layouts need to check that req is inside of - * pgio->pg_lseg. - */ - if (pgio->pg_test && !pgio->pg_test(pgio, prev, req)) - return 0; - return 1; + return false; + return pgio->pg_test(pgio, prev, req); } /** @@ -278,31 +289,18 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev, static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { - size_t newlen = req->wb_bytes; - if (desc->pg_count != 0) { struct nfs_page *prev; - /* - * FIXME: ideally we should be able to coalesce all requests - * that are not block boundary aligned, but currently this - * is problematic for the case of bsize < PAGE_CACHE_SIZE, - * since nfs_flush_multi and nfs_pagein_multi assume you - * can have only one struct nfs_page. - */ - if (desc->pg_bsize < PAGE_SIZE) - return 0; - newlen += desc->pg_count; - if (newlen > desc->pg_bsize) - return 0; prev = nfs_list_entry(desc->pg_list.prev); if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; - } else + } else { desc->pg_base = req->wb_pgbase; + } nfs_list_remove_request(req); nfs_list_add_request(req, &desc->pg_list); - desc->pg_count = newlen; + desc->pg_count += req->wb_bytes; return 1; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index f57f5281a52..8c1309d852a 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -177,13 +177,28 @@ get_layout_hdr(struct pnfs_layout_hdr *lo) atomic_inc(&lo->plh_refcount); } +static struct pnfs_layout_hdr * +pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; + return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : + kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); +} + +static void +pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; + return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); +} + static void destroy_layout_hdr(struct pnfs_layout_hdr *lo) { dprintk("%s: freeing layout cache %p\n", __func__, lo); BUG_ON(!list_empty(&lo->plh_layouts)); NFS_I(lo->plh_inode)->layout = NULL; - kfree(lo); + pnfs_free_layout_hdr(lo); } static void @@ -228,7 +243,7 @@ put_lseg_common(struct pnfs_layout_segment *lseg) { struct inode *inode = lseg->pls_layout->plh_inode; - BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); + WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); list_del_init(&lseg->pls_list); if (list_empty(&lseg->pls_layout->plh_segs)) { set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); @@ -261,11 +276,72 @@ put_lseg(struct pnfs_layout_segment *lseg) } EXPORT_SYMBOL_GPL(put_lseg); +static inline u64 +end_offset(u64 start, u64 len) +{ + u64 end; + + end = start + len; + return end >= start ? end : NFS4_MAX_UINT64; +} + +/* last octet in a range */ +static inline u64 +last_byte_offset(u64 start, u64 len) +{ + u64 end; + + BUG_ON(!len); + end = start + len; + return end > start ? end - 1 : NFS4_MAX_UINT64; +} + +/* + * is l2 fully contained in l1? + * start1 end1 + * [----------------------------------) + * start2 end2 + * [----------------) + */ +static inline int +lo_seg_contained(struct pnfs_layout_range *l1, + struct pnfs_layout_range *l2) +{ + u64 start1 = l1->offset; + u64 end1 = end_offset(start1, l1->length); + u64 start2 = l2->offset; + u64 end2 = end_offset(start2, l2->length); + + return (start1 <= start2) && (end1 >= end2); +} + +/* + * is l1 and l2 intersecting? + * start1 end1 + * [----------------------------------) + * start2 end2 + * [----------------) + */ +static inline int +lo_seg_intersecting(struct pnfs_layout_range *l1, + struct pnfs_layout_range *l2) +{ + u64 start1 = l1->offset; + u64 end1 = end_offset(start1, l1->length); + u64 start2 = l2->offset; + u64 end2 = end_offset(start2, l2->length); + + return (end1 == NFS4_MAX_UINT64 || end1 > start2) && + (end2 == NFS4_MAX_UINT64 || end2 > start1); +} + static bool -should_free_lseg(u32 lseg_iomode, u32 recall_iomode) +should_free_lseg(struct pnfs_layout_range *lseg_range, + struct pnfs_layout_range *recall_range) { - return (recall_iomode == IOMODE_ANY || - lseg_iomode == recall_iomode); + return (recall_range->iomode == IOMODE_ANY || + lseg_range->iomode == recall_range->iomode) && + lo_seg_intersecting(lseg_range, recall_range); } /* Returns 1 if lseg is removed from list, 0 otherwise */ @@ -296,7 +372,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg, int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, - u32 iomode) + struct pnfs_layout_range *recall_range) { struct pnfs_layout_segment *lseg, *next; int invalid = 0, removed = 0; @@ -309,7 +385,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, return 0; } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) - if (should_free_lseg(lseg->pls_range.iomode, iomode)) { + if (!recall_range || + should_free_lseg(&lseg->pls_range, recall_range)) { dprintk("%s: freeing lseg %p iomode %d " "offset %llu length %llu\n", __func__, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, @@ -358,7 +435,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) lo = nfsi->layout; if (lo) { lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ - mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); + mark_matching_lsegs_invalid(lo, &tmp_list, NULL); } spin_unlock(&nfsi->vfs_inode.i_lock); pnfs_free_lseg_list(&tmp_list); @@ -467,7 +544,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, static struct pnfs_layout_segment * send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_open_context *ctx, - u32 iomode, + struct pnfs_layout_range *range, gfp_t gfp_flags) { struct inode *ino = lo->plh_inode; @@ -499,11 +576,11 @@ send_layoutget(struct pnfs_layout_hdr *lo, goto out_err_free; } - lgp->args.minlength = NFS4_MAX_UINT64; + lgp->args.minlength = PAGE_CACHE_SIZE; + if (lgp->args.minlength > range->length) + lgp->args.minlength = range->length; lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; - lgp->args.range.iomode = iomode; - lgp->args.range.offset = 0; - lgp->args.range.length = NFS4_MAX_UINT64; + lgp->args.range = *range; lgp->args.type = server->pnfs_curr_ld->id; lgp->args.inode = ino; lgp->args.ctx = get_nfs_open_context(ctx); @@ -518,7 +595,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, nfs4_proc_layoutget(lgp); if (!lseg) { /* remember that LAYOUTGET failed and suspend trying */ - set_bit(lo_fail_bit(iomode), &lo->plh_flags); + set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); } /* free xdr pages */ @@ -542,6 +619,51 @@ out_err_free: return NULL; } +/* Initiates a LAYOUTRETURN(FILE) */ +int +_pnfs_return_layout(struct inode *ino) +{ + struct pnfs_layout_hdr *lo = NULL; + struct nfs_inode *nfsi = NFS_I(ino); + LIST_HEAD(tmp_list); + struct nfs4_layoutreturn *lrp; + nfs4_stateid stateid; + int status = 0; + + dprintk("--> %s\n", __func__); + + spin_lock(&ino->i_lock); + lo = nfsi->layout; + if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) { + spin_unlock(&ino->i_lock); + dprintk("%s: no layout segments to return\n", __func__); + goto out; + } + stateid = nfsi->layout->plh_stateid; + /* Reference matched in nfs4_layoutreturn_release */ + get_layout_hdr(lo); + spin_unlock(&ino->i_lock); + pnfs_free_lseg_list(&tmp_list); + + WARN_ON(test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)); + + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); + if (unlikely(lrp == NULL)) { + status = -ENOMEM; + goto out; + } + + lrp->args.stateid = stateid; + lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; + lrp->args.inode = ino; + lrp->clp = NFS_SERVER(ino)->nfs_client; + + status = nfs4_proc_layoutreturn(lrp); +out: + dprintk("<-- %s status: %d\n", __func__, status); + return status; +} + bool pnfs_roc(struct inode *ino) { struct pnfs_layout_hdr *lo; @@ -625,10 +747,23 @@ bool pnfs_roc_drain(struct inode *ino, u32 *barrier) * are seen first. */ static s64 -cmp_layout(u32 iomode1, u32 iomode2) +cmp_layout(struct pnfs_layout_range *l1, + struct pnfs_layout_range *l2) { + s64 d; + + /* high offset > low offset */ + d = l1->offset - l2->offset; + if (d) + return d; + + /* short length > long length */ + d = l2->length - l1->length; + if (d) + return d; + /* read > read/write */ - return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); + return (int)(l1->iomode == IOMODE_READ) - (int)(l2->iomode == IOMODE_READ); } static void @@ -636,13 +771,12 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) { struct pnfs_layout_segment *lp; - int found = 0; dprintk("%s:Begin\n", __func__); assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lp, &lo->plh_segs, pls_list) { - if (cmp_layout(lp->pls_range.iomode, lseg->pls_range.iomode) > 0) + if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) continue; list_add_tail(&lseg->pls_list, &lp->pls_list); dprintk("%s: inserted lseg %p " @@ -652,16 +786,14 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, lseg->pls_range.offset, lseg->pls_range.length, lp, lp->pls_range.iomode, lp->pls_range.offset, lp->pls_range.length); - found = 1; - break; - } - if (!found) { - list_add_tail(&lseg->pls_list, &lo->plh_segs); - dprintk("%s: inserted lseg %p " - "iomode %d offset %llu length %llu at tail\n", - __func__, lseg, lseg->pls_range.iomode, - lseg->pls_range.offset, lseg->pls_range.length); + goto out; } + list_add_tail(&lseg->pls_list, &lo->plh_segs); + dprintk("%s: inserted lseg %p " + "iomode %d offset %llu length %llu at tail\n", + __func__, lseg, lseg->pls_range.iomode, + lseg->pls_range.offset, lseg->pls_range.length); +out: get_layout_hdr(lo); dprintk("%s:Return\n", __func__); @@ -672,7 +804,7 @@ alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) { struct pnfs_layout_hdr *lo; - lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); + lo = pnfs_alloc_layout_hdr(ino, gfp_flags); if (!lo) return NULL; atomic_set(&lo->plh_refcount, 1); @@ -705,7 +837,7 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) if (likely(nfsi->layout == NULL)) /* Won the race? */ nfsi->layout = new; else - kfree(new); + pnfs_free_layout_hdr(new); return nfsi->layout; } @@ -721,16 +853,28 @@ pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) * READ RW true */ static int -is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) +is_matching_lseg(struct pnfs_layout_range *ls_range, + struct pnfs_layout_range *range) { - return (iomode != IOMODE_RW || lseg->pls_range.iomode == IOMODE_RW); + struct pnfs_layout_range range1; + + if ((range->iomode == IOMODE_RW && + ls_range->iomode != IOMODE_RW) || + !lo_seg_intersecting(ls_range, range)) + return 0; + + /* range1 covers only the first byte in the range */ + range1 = *range; + range1.length = 1; + return lo_seg_contained(ls_range, &range1); } /* * lookup range in layout */ static struct pnfs_layout_segment * -pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) +pnfs_find_lseg(struct pnfs_layout_hdr *lo, + struct pnfs_layout_range *range) { struct pnfs_layout_segment *lseg, *ret = NULL; @@ -739,11 +883,11 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) assert_spin_locked(&lo->plh_inode->i_lock); list_for_each_entry(lseg, &lo->plh_segs, pls_list) { if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && - is_matching_lseg(lseg, iomode)) { + is_matching_lseg(&lseg->pls_range, range)) { ret = get_lseg(lseg); break; } - if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) + if (cmp_layout(range, &lseg->pls_range) > 0) break; } @@ -759,9 +903,17 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, + loff_t pos, + u64 count, enum pnfs_iomode iomode, gfp_t gfp_flags) { + struct pnfs_layout_range arg = { + .iomode = iomode, + .offset = pos, + .length = count, + }; + unsigned pg_offset; struct nfs_inode *nfsi = NFS_I(ino); struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; struct pnfs_layout_hdr *lo; @@ -789,7 +941,7 @@ pnfs_update_layout(struct inode *ino, goto out_unlock; /* Check to see if the layout for the given range already exists */ - lseg = pnfs_find_lseg(lo, iomode); + lseg = pnfs_find_lseg(lo, &arg); if (lseg) goto out_unlock; @@ -811,7 +963,14 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&clp->cl_lock); } - lseg = send_layoutget(lo, ctx, iomode, gfp_flags); + pg_offset = arg.offset & ~PAGE_CACHE_MASK; + if (pg_offset) { + arg.offset -= pg_offset; + arg.length += pg_offset; + } + arg.length = PAGE_CACHE_ALIGN(arg.length); + + lseg = send_layoutget(lo, ctx, &arg, gfp_flags); if (!lseg && first) { spin_lock(&clp->cl_lock); list_del_init(&lo->plh_layouts); @@ -838,17 +997,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; int status = 0; - /* Verify we got what we asked for. - * Note that because the xdr parsing only accepts a single - * element array, this can fail even if the server is behaving - * correctly. - */ - if (lgp->args.range.iomode > res->range.iomode || - res->range.offset != 0 || - res->range.length != NFS4_MAX_UINT64) { - status = -EINVAL; - goto out; - } /* Inject layout blob into I/O device driver */ lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); if (!lseg || IS_ERR(lseg)) { @@ -895,51 +1043,64 @@ out_forget_reply: goto out; } -static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, - struct nfs_page *prev, - struct nfs_page *req) +bool +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) { + enum pnfs_iomode access_type; + gfp_t gfp_flags; + + /* We assume that pg_ioflags == 0 iff we're reading a page */ + if (pgio->pg_ioflags == 0) { + access_type = IOMODE_READ; + gfp_flags = GFP_KERNEL; + } else { + access_type = IOMODE_RW; + gfp_flags = GFP_NOFS; + } + if (pgio->pg_count == prev->wb_bytes) { /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, - IOMODE_READ, - GFP_KERNEL); + req_offset(req), + pgio->pg_count, + access_type, + gfp_flags); + return true; } - return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); -} -void -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) -{ - struct pnfs_layoutdriver_type *ld; + if (pgio->pg_lseg && + req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset, + pgio->pg_lseg->pls_range.length)) + return false; - ld = NFS_SERVER(inode)->pnfs_curr_ld; - pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL; + return true; } +EXPORT_SYMBOL_GPL(pnfs_generic_pg_test); -static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, - struct nfs_page *prev, - struct nfs_page *req) +/* + * Called by non rpc-based layout drivers + */ +int +pnfs_ld_write_done(struct nfs_write_data *data) { - if (pgio->pg_count == prev->wb_bytes) { - /* This is first coelesce call for a series of nfs_pages */ - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - prev->wb_context, - IOMODE_RW, - GFP_NOFS); - } - return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); -} + int status; -void -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode) -{ - struct pnfs_layoutdriver_type *ld; + if (!data->pnfs_error) { + pnfs_set_layoutcommit(data); + data->mds_ops->rpc_call_done(&data->task, data); + data->mds_ops->rpc_release(data); + return 0; + } - ld = NFS_SERVER(inode)->pnfs_curr_ld; - pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL; + dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, + data->pnfs_error); + status = nfs_initiate_write(data, NFS_CLIENT(data->inode), + data->mds_ops, NFS_FILE_SYNC); + return status ? : -EAGAIN; } +EXPORT_SYMBOL_GPL(pnfs_ld_write_done); enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *wdata, @@ -966,6 +1127,29 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, } /* + * Called by non rpc-based layout drivers + */ +int +pnfs_ld_read_done(struct nfs_read_data *data) +{ + int status; + + if (!data->pnfs_error) { + __nfs4_read_done_cb(data); + data->mds_ops->rpc_call_done(&data->task, data); + data->mds_ops->rpc_release(data); + return 0; + } + + dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__, + data->pnfs_error); + status = nfs_initiate_read(data, NFS_CLIENT(data->inode), + data->mds_ops); + return status ? : -EAGAIN; +} +EXPORT_SYMBOL_GPL(pnfs_ld_read_done); + +/* * Call the appropriate parallel I/O subsystem read function. */ enum pnfs_try_status @@ -1009,7 +1193,7 @@ void pnfs_set_layoutcommit(struct nfs_write_data *wdata) { struct nfs_inode *nfsi = NFS_I(wdata->inode); - loff_t end_pos = wdata->args.offset + wdata->res.count; + loff_t end_pos = wdata->mds_offset + wdata->res.count; bool mark_as_dirty = false; spin_lock(&nfsi->vfs_inode.i_lock); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 0c015bad9e7..48d0a8e4d06 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -30,6 +30,7 @@ #ifndef FS_NFS_PNFS_H #define FS_NFS_PNFS_H +#include <linux/nfs_fs.h> #include <linux/nfs_page.h> enum { @@ -64,17 +65,29 @@ enum { NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */ }; +enum layoutdriver_policy_flags { + /* Should the pNFS client commit and return the layout upon a setattr */ + PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, +}; + +struct nfs4_deviceid_node; + /* Per-layout driver specific registration structure */ struct pnfs_layoutdriver_type { struct list_head pnfs_tblid; const u32 id; const char *name; struct module *owner; + unsigned flags; + + struct pnfs_layout_hdr * (*alloc_layout_hdr) (struct inode *inode, gfp_t gfp_flags); + void (*free_layout_hdr) (struct pnfs_layout_hdr *); + struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); void (*free_lseg) (struct pnfs_layout_segment *lseg); /* test for nfs page cache coalescing */ - int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); /* Returns true if layoutdriver wants to divert this request to * driver's commit routine. @@ -89,6 +102,16 @@ struct pnfs_layoutdriver_type { */ enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data); enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how); + + void (*free_deviceid_node) (struct nfs4_deviceid_node *); + + void (*encode_layoutreturn) (struct pnfs_layout_hdr *layoutid, + struct xdr_stream *xdr, + const struct nfs4_layoutreturn_args *args); + + void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid, + struct xdr_stream *xdr, + const struct nfs4_layoutcommit_args *args); }; struct pnfs_layout_hdr { @@ -120,21 +143,22 @@ extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *dev); extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); +extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode access_type, gfp_t gfp_flags); + loff_t pos, u64 count, enum pnfs_iomode access_type, + gfp_t gfp_flags); void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, const struct rpc_call_ops *, int); enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, const struct rpc_call_ops *); -void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); -void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *); +bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); @@ -148,13 +172,37 @@ int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct nfs4_state *open_state); int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, struct list_head *tmp_list, - u32 iomode); + struct pnfs_layout_range *recall_range); bool pnfs_roc(struct inode *ino); void pnfs_roc_release(struct inode *ino); void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); bool pnfs_roc_drain(struct inode *ino, u32 *barrier); void pnfs_set_layoutcommit(struct nfs_write_data *wdata); int pnfs_layoutcommit_inode(struct inode *inode, bool sync); +int _pnfs_return_layout(struct inode *); +int pnfs_ld_write_done(struct nfs_write_data *); +int pnfs_ld_read_done(struct nfs_read_data *); + +/* pnfs_dev.c */ +struct nfs4_deviceid_node { + struct hlist_node node; + const struct pnfs_layoutdriver_type *ld; + const struct nfs_client *nfs_client; + struct nfs4_deviceid deviceid; + atomic_t ref; +}; + +void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); +struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); +struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); +void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); +void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, + const struct pnfs_layoutdriver_type *, + const struct nfs_client *, + const struct nfs4_deviceid *); +struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); +bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); +void nfs4_deviceid_purge_client(const struct nfs_client *); static inline int lo_fail_bit(u32 iomode) { @@ -223,6 +271,36 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) put_lseg(req->wb_commit_lseg); } +/* Should the pNFS client commit and return the layout upon a setattr */ +static inline bool +pnfs_ld_layoutret_on_setattr(struct inode *inode) +{ + if (!pnfs_enabled_sb(NFS_SERVER(inode))) + return false; + return NFS_SERVER(inode)->pnfs_curr_ld->flags & + PNFS_LAYOUTRET_ON_SETATTR; +} + +static inline int pnfs_return_layout(struct inode *ino) +{ + struct nfs_inode *nfsi = NFS_I(ino); + struct nfs_server *nfss = NFS_SERVER(ino); + + if (pnfs_enabled_sb(nfss) && nfsi->layout) + return _pnfs_return_layout(ino); + + return 0; +} + +static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, + struct inode *inode) +{ + struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; + + if (ld) + pgio->pg_test = ld->pg_test; +} + #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -245,7 +323,8 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) static inline struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode access_type, gfp_t gfp_flags) + loff_t pos, u64 count, enum pnfs_iomode access_type, + gfp_t gfp_flags) { return NULL; } @@ -264,6 +343,17 @@ pnfs_try_to_write_data(struct nfs_write_data *data, return PNFS_NOT_ATTEMPTED; } +static inline int pnfs_return_layout(struct inode *ino) +{ + return 0; +} + +static inline bool +pnfs_ld_layoutret_on_setattr(struct inode *inode) +{ + return false; +} + static inline bool pnfs_roc(struct inode *ino) { @@ -294,16 +384,9 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline void -pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino) -{ - pgio->pg_test = NULL; -} - -static inline void -pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino) +static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, + struct inode *inode) { - pgio->pg_test = NULL; } static inline void @@ -331,6 +414,10 @@ static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync) { return 0; } + +static inline void nfs4_deviceid_purge_client(struct nfs_client *ncl) +{ +} #endif /* CONFIG_NFS_V4_1 */ #endif /* FS_NFS_PNFS_H */ diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c new file mode 100644 index 00000000000..c65e133ce9c --- /dev/null +++ b/fs/nfs/pnfs_dev.c @@ -0,0 +1,270 @@ +/* + * Device operations for the pnfs client. + * + * Copyright (c) 2002 + * The Regents of the University of Michigan + * All Rights Reserved + * + * Dean Hildebrand <dhildebz@umich.edu> + * Garth Goodson <Garth.Goodson@netapp.com> + * + * Permission is granted to use, copy, create derivative works, and + * redistribute this software and such derivative works for any purpose, + * so long as the name of the University of Michigan is not used in + * any advertising or publicity pertaining to the use or distribution + * of this software without specific, written prior authorization. If + * the above copyright notice or any other identification of the + * University of Michigan is included in any copy of any portion of + * this software, then the disclaimer below must also be included. + * + * This software is provided as is, without representation or warranty + * of any kind either express or implied, including without limitation + * the implied warranties of merchantability, fitness for a particular + * purpose, or noninfringement. The Regents of the University of + * Michigan shall not be liable for any damages, including special, + * indirect, incidental, or consequential damages, with respect to any + * claim arising out of or in connection with the use of the software, + * even if it has been or is hereafter advised of the possibility of + * such damages. + */ + +#include "pnfs.h" + +#define NFSDBG_FACILITY NFSDBG_PNFS + +/* + * Device ID RCU cache. A device ID is unique per server and layout type. + */ +#define NFS4_DEVICE_ID_HASH_BITS 5 +#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) +#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) + +static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; +static DEFINE_SPINLOCK(nfs4_deviceid_lock); + +void +nfs4_print_deviceid(const struct nfs4_deviceid *id) +{ + u32 *p = (u32 *)id; + + dprintk("%s: device id= [%x%x%x%x]\n", __func__, + p[0], p[1], p[2], p[3]); +} +EXPORT_SYMBOL_GPL(nfs4_print_deviceid); + +static inline u32 +nfs4_deviceid_hash(const struct nfs4_deviceid *id) +{ + unsigned char *cptr = (unsigned char *)id->data; + unsigned int nbytes = NFS4_DEVICEID4_SIZE; + u32 x = 0; + + while (nbytes--) { + x *= 37; + x += *cptr++; + } + return x & NFS4_DEVICE_ID_HASH_MASK; +} + +static struct nfs4_deviceid_node * +_lookup_deviceid(const struct pnfs_layoutdriver_type *ld, + const struct nfs_client *clp, const struct nfs4_deviceid *id, + long hash) +{ + struct nfs4_deviceid_node *d; + struct hlist_node *n; + + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + if (d->ld == ld && d->nfs_client == clp && + !memcmp(&d->deviceid, id, sizeof(*id))) { + if (atomic_read(&d->ref)) + return d; + else + continue; + } + return NULL; +} + +/* + * Lookup a deviceid in cache and get a reference count on it if found + * + * @clp nfs_client associated with deviceid + * @id deviceid to look up + */ +struct nfs4_deviceid_node * +_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, + const struct nfs_client *clp, const struct nfs4_deviceid *id, + long hash) +{ + struct nfs4_deviceid_node *d; + + rcu_read_lock(); + d = _lookup_deviceid(ld, clp, id, hash); + if (d && !atomic_inc_not_zero(&d->ref)) + d = NULL; + rcu_read_unlock(); + return d; +} + +struct nfs4_deviceid_node * +nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, + const struct nfs_client *clp, const struct nfs4_deviceid *id) +{ + return _find_get_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); +} +EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); + +/* + * Unhash and put deviceid + * + * @clp nfs_client associated with deviceid + * @id the deviceid to unhash + * + * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. + */ +struct nfs4_deviceid_node * +nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, + const struct nfs_client *clp, const struct nfs4_deviceid *id) +{ + struct nfs4_deviceid_node *d; + + spin_lock(&nfs4_deviceid_lock); + rcu_read_lock(); + d = _lookup_deviceid(ld, clp, id, nfs4_deviceid_hash(id)); + rcu_read_unlock(); + if (!d) { + spin_unlock(&nfs4_deviceid_lock); + return NULL; + } + hlist_del_init_rcu(&d->node); + spin_unlock(&nfs4_deviceid_lock); + synchronize_rcu(); + + /* balance the initial ref set in pnfs_insert_deviceid */ + if (atomic_dec_and_test(&d->ref)) + return d; + + return NULL; +} +EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); + +/* + * Delete a deviceid from cache + * + * @clp struct nfs_client qualifying the deviceid + * @id deviceid to delete + */ +void +nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, + const struct nfs_client *clp, const struct nfs4_deviceid *id) +{ + struct nfs4_deviceid_node *d; + + d = nfs4_unhash_put_deviceid(ld, clp, id); + if (!d) + return; + d->ld->free_deviceid_node(d); +} +EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); + +void +nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, + const struct pnfs_layoutdriver_type *ld, + const struct nfs_client *nfs_client, + const struct nfs4_deviceid *id) +{ + INIT_HLIST_NODE(&d->node); + d->ld = ld; + d->nfs_client = nfs_client; + d->deviceid = *id; + atomic_set(&d->ref, 1); +} +EXPORT_SYMBOL_GPL(nfs4_init_deviceid_node); + +/* + * Uniquely initialize and insert a deviceid node into cache + * + * @new new deviceid node + * Note that the caller must set up the following members: + * new->ld + * new->nfs_client + * new->deviceid + * + * @ret the inserted node, if none found, otherwise, the found entry. + */ +struct nfs4_deviceid_node * +nfs4_insert_deviceid_node(struct nfs4_deviceid_node *new) +{ + struct nfs4_deviceid_node *d; + long hash; + + spin_lock(&nfs4_deviceid_lock); + hash = nfs4_deviceid_hash(&new->deviceid); + d = _find_get_deviceid(new->ld, new->nfs_client, &new->deviceid, hash); + if (d) { + spin_unlock(&nfs4_deviceid_lock); + return d; + } + + hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); + spin_unlock(&nfs4_deviceid_lock); + + return new; +} +EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); + +/* + * Dereference a deviceid node and delete it when its reference count drops + * to zero. + * + * @d deviceid node to put + * + * @ret true iff the node was deleted + */ +bool +nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) +{ + if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) + return false; + hlist_del_init_rcu(&d->node); + spin_unlock(&nfs4_deviceid_lock); + synchronize_rcu(); + d->ld->free_deviceid_node(d); + return true; +} +EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); + +static void +_deviceid_purge_client(const struct nfs_client *clp, long hash) +{ + struct nfs4_deviceid_node *d; + struct hlist_node *n, *next; + HLIST_HEAD(tmp); + + rcu_read_lock(); + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node) + if (d->nfs_client == clp && atomic_read(&d->ref)) { + hlist_del_init_rcu(&d->node); + hlist_add_head(&d->node, &tmp); + } + rcu_read_unlock(); + + if (hlist_empty(&tmp)) + return; + + synchronize_rcu(); + hlist_for_each_entry_safe(d, n, next, &tmp, node) + if (atomic_dec_and_test(&d->ref)) + d->ld->free_deviceid_node(d); +} + +void +nfs4_deviceid_purge_client(const struct nfs_client *clp) +{ + long h; + + spin_lock(&nfs4_deviceid_lock); + for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) + _deviceid_purge_client(clp, h); + spin_unlock(&nfs4_deviceid_lock); +} diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2bcf0dc306a..20a7f952e24 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -288,7 +288,9 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) atomic_set(&req->wb_complete, requests); BUG_ON(desc->pg_lseg != NULL); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, + req_offset(req), desc->pg_count, + IOMODE_READ, GFP_KERNEL); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -351,7 +353,9 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, + req_offset(req), desc->pg_count, + IOMODE_READ, GFP_KERNEL); ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 0, lseg); @@ -660,7 +664,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - pnfs_pageio_init_read(&pgio, inode); if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); else diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e288f06d3fa..ce40e5c568b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -63,6 +63,7 @@ #include "iostat.h" #include "internal.h" #include "fscache.h" +#include "pnfs.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -732,6 +733,28 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } +#ifdef CONFIG_NFS_V4_1 +void show_sessions(struct seq_file *m, struct nfs_server *server) +{ + if (nfs4_has_session(server->nfs_client)) + seq_printf(m, ",sessions"); +} +#else +void show_sessions(struct seq_file *m, struct nfs_server *server) {} +#endif + +#ifdef CONFIG_NFS_V4_1 +void show_pnfs(struct seq_file *m, struct nfs_server *server) +{ + seq_printf(m, ",pnfs="); + if (server->pnfs_curr_ld) + seq_printf(m, "%s", server->pnfs_curr_ld->name); + else + seq_printf(m, "not configured"); +} +#else /* CONFIG_NFS_V4_1 */ +void show_pnfs(struct seq_file *m, struct nfs_server *server) {} +#endif /* CONFIG_NFS_V4_1 */ static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt) { @@ -792,6 +815,8 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); + show_sessions(m, nfss); + show_pnfs(m, nfss); } #endif diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 49c715b4ac9..e268e3b2349 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -939,7 +939,9 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) atomic_set(&req->wb_complete, requests); BUG_ON(desc->pg_lseg); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, + req_offset(req), desc->pg_count, + IOMODE_RW, GFP_NOFS); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -1013,7 +1015,9 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, + req_offset(req), desc->pg_count, + IOMODE_RW, GFP_NOFS); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) @@ -1032,8 +1036,6 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, { size_t wsize = NFS_SERVER(inode)->wsize; - pnfs_pageio_init_write(pgio, inode); - if (wsize < PAGE_CACHE_SIZE) nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); else diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index ad000aeb21a..b9566e46219 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1354,12 +1354,6 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp) if (IS_ERR(exp)) return nfserrno(PTR_ERR(exp)); rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL); - if (rv) - goto out; - rv = check_nfsd_access(exp, rqstp); - if (rv) - fh_put(fhp); -out: exp_put(exp); return rv; } diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 2247fc91d5e..9095f3c21df 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -245,7 +245,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp, } /* Now create the file and set attributes */ - nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len, + nfserr = do_nfsd_create(rqstp, dirfhp, argp->name, argp->len, attr, newfhp, argp->createmode, argp->verf, NULL, NULL); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index ad48faca20f..08c6e36ab2e 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -842,7 +842,7 @@ out: return rv; } -__be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) +static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) { struct svc_fh fh; int err; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 5fcb1396a7e..3a6dbd70b34 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -196,9 +196,9 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o /* * Note: create modes (UNCHECKED,GUARDED...) are the same - * in NFSv4 as in v3. + * in NFSv4 as in v3 except EXCLUSIVE4_1. */ - status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data, + status = do_nfsd_create(rqstp, current_fh, open->op_fname.data, open->op_fname.len, &open->op_iattr, &resfh, open->op_createmode, (u32 *)open->op_verf.data, @@ -403,7 +403,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen); - return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP); + return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_BYPASS_GSS); } static __be32 @@ -762,6 +762,9 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 err; fh_init(&resfh, NFS4_FHSIZE); + err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, &cstate->current_fh, secinfo->si_name, secinfo->si_namelen, &exp, &dentry); @@ -986,6 +989,9 @@ enum nfsd4_op_flags { ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ + /* For rfc 5661 section 2.6.3.1.1: */ + OP_HANDLES_WRONGSEC = 1 << 3, + OP_IS_PUTFH_LIKE = 1 << 4, }; struct nfsd4_operation { @@ -1031,6 +1037,44 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) return nfs_ok; } +static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) +{ + return &nfsd4_ops[op->opnum]; +} + +static bool need_wrongsec_check(struct svc_rqst *rqstp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + struct nfsd4_compoundargs *argp = rqstp->rq_argp; + struct nfsd4_op *this = &argp->ops[resp->opcnt - 1]; + struct nfsd4_op *next = &argp->ops[resp->opcnt]; + struct nfsd4_operation *thisd; + struct nfsd4_operation *nextd; + + thisd = OPDESC(this); + /* + * Most ops check wronsec on our own; only the putfh-like ops + * have special rules. + */ + if (!(thisd->op_flags & OP_IS_PUTFH_LIKE)) + return false; + /* + * rfc 5661 2.6.3.1.1.6: don't bother erroring out a + * put-filehandle operation if we're not going to use the + * result: + */ + if (argp->opcnt == resp->opcnt) + return false; + + nextd = OPDESC(next); + /* + * Rest of 2.6.3.1.1: certain operations will return WRONGSEC + * errors themselves as necessary; others should check for them + * now: + */ + return !(nextd->op_flags & OP_HANDLES_WRONGSEC); +} + /* * COMPOUND call. */ @@ -1108,7 +1152,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, goto encode_op; } - opdesc = &nfsd4_ops[op->opnum]; + opdesc = OPDESC(op); if (!cstate->current_fh.fh_dentry) { if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { @@ -1126,6 +1170,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, else BUG_ON(op->status == nfs_ok); + if (!op->status && need_wrongsec_check(rqstp)) + op->status = check_nfsd_access(cstate->current_fh.fh_export, rqstp); + encode_op: /* Only from SEQUENCE */ if (resp->cstate.status == nfserr_replay_cache) { @@ -1217,10 +1264,12 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_LOOKUP] = { .op_func = (nfsd4op_func)nfsd4_lookup, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_LOOKUP", }, [OP_LOOKUPP] = { .op_func = (nfsd4op_func)nfsd4_lookupp, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_LOOKUPP", }, [OP_NVERIFY] = { @@ -1229,6 +1278,7 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_OPEN] = { .op_func = (nfsd4op_func)nfsd4_open, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_OPEN", }, [OP_OPEN_CONFIRM] = { @@ -1241,17 +1291,20 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_PUTFH] = { .op_func = (nfsd4op_func)nfsd4_putfh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_PUTFH", }, [OP_PUTPUBFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_PUTPUBFH", }, [OP_PUTROOTFH] = { .op_func = (nfsd4op_func)nfsd4_putrootfh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_PUTROOTFH", }, [OP_READ] = { @@ -1281,15 +1334,18 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_RESTOREFH] = { .op_func = (nfsd4op_func)nfsd4_restorefh, - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS + | OP_IS_PUTFH_LIKE, .op_name = "OP_RESTOREFH", }, [OP_SAVEFH] = { .op_func = (nfsd4op_func)nfsd4_savefh, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SAVEFH", }, [OP_SECINFO] = { .op_func = (nfsd4op_func)nfsd4_secinfo, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO", }, [OP_SETATTR] = { @@ -1353,6 +1409,7 @@ static struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO_NO_NAME] = { .op_func = (nfsd4op_func)nfsd4_secinfo_no_name, + .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", }, }; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 4cf04e11c66..e98f3c2e949 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1519,6 +1519,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, bool confirm_me = false; int status = 0; + if (cr_ses->flags & ~SESSION4_FLAG_MASK_A) + return nfserr_inval; + nfs4_lock_state(); unconf = find_unconfirmed_client(&cr_ses->clientid); conf = find_confirmed_client(&cr_ses->clientid); @@ -1637,8 +1640,9 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, return nfserr_badsession; status = nfsd4_map_bcts_dir(&bcts->dir); - nfsd4_new_conn(rqstp, cstate->session, bcts->dir); - return nfs_ok; + if (!status) + nfsd4_new_conn(rqstp, cstate->session, bcts->dir); + return status; } static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid) @@ -1725,6 +1729,13 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi return; } +static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session) +{ + struct nfsd4_compoundargs *args = rqstp->rq_argp; + + return args->opcnt > session->se_fchannel.maxops; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1753,6 +1764,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (!session) goto out; + status = nfserr_too_many_ops; + if (nfsd4_session_too_many_ops(rqstp, session)) + goto out; + status = nfserr_badslot; if (seq->slotid >= session->se_fchannel.maxreqs) goto out; @@ -1808,6 +1823,8 @@ out: __be32 nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc) { + int status = 0; + if (rc->rca_one_fs) { if (!cstate->current_fh.fh_dentry) return nfserr_nofilehandle; @@ -1817,9 +1834,14 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta */ return nfs_ok; } + nfs4_lock_state(); - if (is_client_expired(cstate->session->se_client)) { - nfs4_unlock_state(); + status = nfserr_complete_already; + if (cstate->session->se_client->cl_firststate) + goto out; + + status = nfserr_stale_clientid; + if (is_client_expired(cstate->session->se_client)) /* * The following error isn't really legal. * But we only get here if the client just explicitly @@ -1827,11 +1849,13 @@ nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta * error it gets back on an operation for the dead * client. */ - return nfserr_stale_clientid; - } + goto out; + + status = nfs_ok; nfsd4_create_clid_dir(cstate->session->se_client); +out: nfs4_unlock_state(); - return nfs_ok; + return status; } __be32 @@ -2462,7 +2486,7 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid) return NULL; } -int share_access_to_flags(u32 share_access) +static int share_access_to_flags(u32 share_access) { share_access &= ~NFS4_SHARE_WANT_MASK; @@ -2882,7 +2906,7 @@ out: return status; } -struct lock_manager nfsd4_manager = { +static struct lock_manager nfsd4_manager = { }; static void diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c6766af00d9..99018110321 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -424,15 +424,12 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts) { DECODE_HEAD; - u32 dummy; READ_BUF(NFS4_MAX_SESSIONID_LEN + 8); COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN); READ32(bcts->dir); - /* XXX: Perhaps Tom Tucker could help us figure out how we - * should be using ctsa_use_conn_in_rdma_mode: */ - READ32(dummy); - + /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker + * could help us figure out we should be using it. */ DECODE_TAIL; } @@ -588,8 +585,6 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt) READ_BUF(lockt->lt_owner.len); READMEM(lockt->lt_owner.data, lockt->lt_owner.len); - if (argp->minorversion && !zero_clientid(&lockt->lt_clientid)) - return nfserr_inval; DECODE_TAIL; } @@ -3120,7 +3115,7 @@ nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, return nfserr; } -__be32 +static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_sequence *seq) { diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 55c8e63af0b..90c6aa6d5e0 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -344,7 +344,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) * which clients virtually always use auth_sys for, * even while using RPCSEC_GSS for NFS. */ - if (access & NFSD_MAY_LOCK) + if (access & NFSD_MAY_LOCK || access & NFSD_MAY_BYPASS_GSS) goto skip_pseudoflavor_check; /* * Clients may expect to be able to use auth_sys during mount, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 129f3c9f62d..d5718273bb3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -181,16 +181,10 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp, struct svc_export *exp; struct dentry *dparent; struct dentry *dentry; - __be32 err; int host_err; dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); - /* Obtain dentry and export. */ - err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); - if (err) - return err; - dparent = fhp->fh_dentry; exp = fhp->fh_export; exp_get(exp); @@ -254,6 +248,9 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name, struct dentry *dentry; __be32 err; + err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC); + if (err) + return err; err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry); if (err) return err; @@ -877,13 +874,11 @@ static __be32 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, loff_t offset, struct kvec *vec, int vlen, unsigned long *count) { - struct inode *inode; mm_segment_t oldfs; __be32 err; int host_err; err = nfserr_perm; - inode = file->f_path.dentry->d_inode; if (file->f_op->splice_read && rqstp->rq_splice_ok) { struct splice_desc sd = { @@ -1340,11 +1335,18 @@ out_nfserr: } #ifdef CONFIG_NFSD_V3 + +static inline int nfsd_create_is_exclusive(int createmode) +{ + return createmode == NFS3_CREATE_EXCLUSIVE + || createmode == NFS4_CREATE_EXCLUSIVE4_1; +} + /* - * NFSv3 version of nfsd_create + * NFSv3 and NFSv4 version of nfsd_create */ __be32 -nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, +do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, char *fname, int flen, struct iattr *iap, struct svc_fh *resfhp, int createmode, u32 *verifier, int *truncp, int *created) @@ -1396,7 +1398,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, if (err) goto out; - if (createmode == NFS3_CREATE_EXCLUSIVE) { + if (nfsd_create_is_exclusive(createmode)) { /* solaris7 gets confused (bugid 4218508) if these have * the high bit set, so just clear the high bits. If this is * ever changed to use different attrs for storing the @@ -1437,6 +1439,11 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, && dchild->d_inode->i_atime.tv_sec == v_atime && dchild->d_inode->i_size == 0 ) break; + case NFS4_CREATE_EXCLUSIVE4_1: + if ( dchild->d_inode->i_mtime.tv_sec == v_mtime + && dchild->d_inode->i_atime.tv_sec == v_atime + && dchild->d_inode->i_size == 0 ) + goto set_attr; /* fallthru */ case NFS3_CREATE_GUARDED: err = nfserr_exist; @@ -1455,7 +1462,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_check_ignore_resizing(iap); - if (createmode == NFS3_CREATE_EXCLUSIVE) { + if (nfsd_create_is_exclusive(createmode)) { /* Cram the verifier into atime/mtime */ iap->ia_valid = ATTR_MTIME|ATTR_ATIME | ATTR_MTIME_SET|ATTR_ATIME_SET; @@ -2034,7 +2041,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, struct inode *inode = dentry->d_inode; int err; - if (acc == NFSD_MAY_NOP) + if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP) return 0; #if 0 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 9a370a5e36b..e0bbac04d1d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -17,10 +17,14 @@ #define NFSD_MAY_SATTR 8 #define NFSD_MAY_TRUNC 16 #define NFSD_MAY_LOCK 32 +#define NFSD_MAY_MASK 63 + +/* extra hints to permission and open routines: */ #define NFSD_MAY_OWNER_OVERRIDE 64 #define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/ #define NFSD_MAY_BYPASS_GSS_ON_ROOT 256 #define NFSD_MAY_NOT_BREAK_LEASE 512 +#define NFSD_MAY_BYPASS_GSS 1024 #define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE) #define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC) @@ -54,7 +58,7 @@ __be32 nfsd_create(struct svc_rqst *, struct svc_fh *, int type, dev_t rdev, struct svc_fh *res); #ifdef CONFIG_NFSD_V3 __be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *); -__be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *, +__be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, struct svc_fh *res, int createmode, u32 *verifier, int *truncp, int *created); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 587f1843283..b954878ad6c 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -917,7 +917,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) * construction. This function can be called both as a single operation * and as a part of indivisible file operations. */ -void nilfs_dirty_inode(struct inode *inode) +void nilfs_dirty_inode(struct inode *inode, int flags) { struct nilfs_transaction_info ti; struct nilfs_mdt_info *mdi = NILFS_MDT(inode); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 1102a5fbb74..546849b3e88 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -334,8 +334,6 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) struct nilfs_transaction_info ti; int err; - dentry_unhash(dentry); - err = nilfs_transaction_begin(dir->i_sb, &ti, 0); if (err) return err; @@ -371,9 +369,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct nilfs_transaction_info ti; int err; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - err = nilfs_transaction_begin(old_dir->i_sb, &ti, 1); if (unlikely(err)) return err; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index a9c6a531f80..f02b9ad43a2 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -269,7 +269,7 @@ int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); extern int nilfs_inode_dirty(struct inode *); int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); extern int nilfs_mark_inode_dirty(struct inode *); -extern void nilfs_dirty_inode(struct inode *); +extern void nilfs_dirty_inode(struct inode *, int flags); int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index c368360c35a..3b8d3979e03 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -241,11 +241,9 @@ static int omfs_remove(struct inode *dir, struct dentry *dentry) int ret; - if (S_ISDIR(inode->i_mode)) { - dentry_unhash(dentry); - if (!omfs_dir_is_empty(inode)) - return -ENOTEMPTY; - } + if (S_ISDIR(inode->i_mode) && + !omfs_dir_is_empty(inode)) + return -ENOTEMPTY; ret = omfs_delete_entry(dentry); if (ret) @@ -382,9 +380,6 @@ static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, int err; if (new_inode) { - if (S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - /* overwriting existing file/dir */ err = omfs_remove(new_dir, new_dentry); if (err) diff --git a/fs/proc/base.c b/fs/proc/base.c index 4ede550517a..14def991d9d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -83,6 +83,9 @@ #include <linux/pid_namespace.h> #include <linux/fs_struct.h> #include <linux/slab.h> +#ifdef CONFIG_HARDWALL +#include <asm/hardwall.h> +#endif #include "internal.h" /* NOTE: @@ -2842,6 +2845,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_TASK_IO_ACCOUNTING INF("io", S_IRUGO, proc_tgid_io_accounting), #endif +#ifdef CONFIG_HARDWALL + INF("hardwall", S_IRUGO, proc_pid_hardwall), +#endif }; static int proc_tgid_base_readdir(struct file * filp, @@ -3181,6 +3187,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_TASK_IO_ACCOUNTING INF("io", S_IRUGO, proc_tid_io_accounting), #endif +#ifdef CONFIG_HARDWALL + INF("hardwall", S_IRUGO, proc_pid_hardwall), +#endif }; static int proc_tid_base_readdir(struct file * filp, diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 76c8164d565..118662690cd 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -831,8 +831,6 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry) INITIALIZE_PATH(path); struct reiserfs_dir_entry de; - dentry_unhash(dentry); - /* we will be doing 2 balancings and update 2 stat data, we change quotas * of the owner of the directory and of the owner of the parent directory. * The quota structure is possibly deleted only on last iput => outside @@ -1227,9 +1225,6 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry, unsigned long savelink = 1; struct timespec ctime; - if (new_dentry->d_inode && S_ISDIR(new_dentry->d_inode->i_mode)) - dentry_unhash(new_dentry); - /* three balancings: (1) old name removal, (2) new name insertion and (3) maybe "save" link insertion stat data updates: (1) old directory, diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b216ff6be1c..aa91089162c 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -568,7 +568,7 @@ static void destroy_inodecache(void) } /* we don't mark inodes dirty, we just log them */ -static void reiserfs_dirty_inode(struct inode *inode) +static void reiserfs_dirty_inode(struct inode *inode, int flags) { struct reiserfs_transaction_handle th; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 50f1abccd1c..e8a62f41b45 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -98,7 +98,6 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry) reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex, I_MUTEX_CHILD, dir->i_sb); - dentry_unhash(dentry); error = dir->i_op->rmdir(dir, dentry); if (!error) dentry->d_inode->i_flags |= S_DEAD; diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index 730c56248c9..5e1101ff276 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c @@ -147,7 +147,7 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, * table[0] points to the first inode lookup table metadata block, * this should be less than lookup_table_start */ - if (!IS_ERR(table) && table[0] >= lookup_table_start) { + if (!IS_ERR(table) && le64_to_cpu(table[0]) >= lookup_table_start) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/fragment.c b/fs/squashfs/fragment.c index 1516a6490bf..0ed6edbc5c7 100644 --- a/fs/squashfs/fragment.c +++ b/fs/squashfs/fragment.c @@ -90,7 +90,7 @@ __le64 *squashfs_read_fragment_index_table(struct super_block *sb, * table[0] points to the first fragment table metadata block, this * should be less than fragment_table_start */ - if (!IS_ERR(table) && table[0] >= fragment_table_start) { + if (!IS_ERR(table) && le64_to_cpu(table[0]) >= fragment_table_start) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index a70858e0fb4..d38ea3dab95 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c @@ -93,7 +93,7 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, * table[0] points to the first id lookup table metadata block, this * should be less than id_table_start */ - if (!IS_ERR(table) && table[0] >= id_table_start) { + if (!IS_ERR(table) && le64_to_cpu(table[0]) >= id_table_start) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 6f26abee359..7438850c62d 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -245,7 +245,7 @@ allocate_id_index_table: msblk->id_table = NULL; goto failed_mount; } - next_table = msblk->id_table[0]; + next_table = le64_to_cpu(msblk->id_table[0]); /* Handle inode lookup table */ lookup_table_start = le64_to_cpu(sblk->lookup_table_start); @@ -261,7 +261,7 @@ allocate_id_index_table: msblk->inode_lookup_table = NULL; goto failed_mount; } - next_table = msblk->inode_lookup_table[0]; + next_table = le64_to_cpu(msblk->inode_lookup_table[0]); sb->s_export_op = &squashfs_export_ops; @@ -286,7 +286,7 @@ handle_fragments: msblk->fragment_index = NULL; goto failed_mount; } - next_table = msblk->fragment_index[0]; + next_table = le64_to_cpu(msblk->fragment_index[0]); check_directory_table: /* Sanity check directory_table */ diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index e2cc6756f3b..e474fbcf8bd 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -196,8 +196,6 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry) struct inode *inode = dentry->d_inode; int err = -ENOTEMPTY; - dentry_unhash(dentry); - if (sysv_empty_dir(inode)) { err = sysv_unlink(dir, dentry); if (!err) { @@ -224,9 +222,6 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry, struct sysv_dir_entry * old_de; int err = -ENOENT; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - old_de = sysv_find_entry(old_dentry, &old_page); if (!old_de) goto out; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index c2b80943560..ef5abd38f0b 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -656,8 +656,6 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 }; - dentry_unhash(dentry); - /* * Budget request settings: deletion direntry, deletion inode and * changing the parent inode. If budgeting fails, go ahead anyway @@ -978,9 +976,6 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct timespec time; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - /* * Budget request settings: deletion direntry, new direntry, removing * the old inode, and changing old and new parent directory inodes. diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 166951e0dcd..3be645e012c 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -581,6 +581,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) ubifs_assert(wbuf->size % c->min_io_size == 0); ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); ubifs_assert(!c->ro_media && !c->ro_mount); + ubifs_assert(!c->space_fixup); if (c->leb_size - wbuf->offs >= c->max_write_size) ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); @@ -759,6 +760,7 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); ubifs_assert(!c->ro_media && !c->ro_mount); + ubifs_assert(!c->space_fixup); if (c->ro_error) return -EROFS; diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 34b1679e6e3..cef0460f4c5 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -669,6 +669,7 @@ out_free: out_release: release_head(c, BASEHD); + kfree(dent); out_ro: ubifs_ro_mode(c, err); if (last_reference) diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index bd644bf587a..a5422fffbd6 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -674,7 +674,7 @@ static int kill_orphans(struct ubifs_info *c) if (IS_ERR(sleb)) { if (PTR_ERR(sleb) == -EUCLEAN) sleb = ubifs_recover_leb(c, lnum, 0, - c->sbuf, 0); + c->sbuf, -1); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 731d9e2e7b5..783d8e0beb7 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -564,19 +564,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, } /** - * drop_last_node - drop the last node or group of nodes. + * drop_last_group - drop the last group of nodes. * @sleb: scanned LEB information * @offs: offset of dropped nodes is returned here - * @grouped: non-zero if whole group of nodes have to be dropped * * This is a helper function for 'ubifs_recover_leb()' which drops the last - * node of the scanned LEB or the last group of nodes if @grouped is not zero. - * This function returns %1 if a node was dropped and %0 otherwise. + * group of nodes of the scanned LEB. */ -static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) +static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) { - int dropped = 0; - while (!list_empty(&sleb->nodes)) { struct ubifs_scan_node *snod; struct ubifs_ch *ch; @@ -585,17 +581,40 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) list); ch = snod->node; if (ch->group_type != UBIFS_IN_NODE_GROUP) - return dropped; - dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); + break; + + dbg_rcvry("dropping grouped node at %d:%d", + sleb->lnum, snod->offs); + *offs = snod->offs; + list_del(&snod->list); + kfree(snod); + sleb->nodes_cnt -= 1; + } +} + +/** + * drop_last_node - drop the last node. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * @grouped: non-zero if whole group of nodes have to be dropped + * + * This is a helper function for 'ubifs_recover_leb()' which drops the last + * node of the scanned LEB. + */ +static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) +{ + struct ubifs_scan_node *snod; + + if (!list_empty(&sleb->nodes)) { + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + + dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); *offs = snod->offs; list_del(&snod->list); kfree(snod); sleb->nodes_cnt -= 1; - dropped = 1; - if (!grouped) - break; } - return dropped; } /** @@ -604,7 +623,8 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) * @lnum: LEB number * @offs: offset * @sbuf: LEB-sized buffer to use - * @grouped: nodes may be grouped for recovery + * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not + * belong to any journal head) * * This function does a scan of a LEB, but caters for errors that might have * been caused by the unclean unmount from which we are attempting to recover. @@ -612,13 +632,14 @@ static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) * found, and a negative error code in case of failure. */ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, - int offs, void *sbuf, int grouped) + int offs, void *sbuf, int jhead) { int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; + int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; struct ubifs_scan_leb *sleb; void *buf = sbuf + offs; - dbg_rcvry("%d:%d", lnum, offs); + dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); sleb = ubifs_start_scan(c, lnum, offs, sbuf); if (IS_ERR(sleb)) @@ -635,7 +656,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * Scan quietly until there is an error from which we cannot * recover */ - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); if (ret == SCANNED_A_NODE) { /* A valid node, and not a padding node */ struct ubifs_ch *ch = buf; @@ -695,59 +716,62 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * If nodes are grouped, always drop the incomplete group at * the end. */ - drop_last_node(sleb, &offs, 1); + drop_last_group(sleb, &offs); - /* - * While we are in the middle of the same min. I/O unit keep dropping - * nodes. So basically, what we want is to make sure that the last min. - * I/O unit where we saw the corruption is dropped completely with all - * the uncorrupted node which may possibly sit there. - * - * In other words, let's name the min. I/O unit where the corruption - * starts B, and the previous min. I/O unit A. The below code tries to - * deal with a situation when half of B contains valid nodes or the end - * of a valid node, and the second half of B contains corrupted data or - * garbage. This means that UBIFS had been writing to B just before the - * power cut happened. I do not know how realistic is this scenario - * that half of the min. I/O unit had been written successfully and the - * other half not, but this is possible in our 'failure mode emulation' - * infrastructure at least. - * - * So what is the problem, why we need to drop those nodes? Whey can't - * we just clean-up the second half of B by putting a padding node - * there? We can, and this works fine with one exception which was - * reproduced with power cut emulation testing and happens extremely - * rarely. The description follows, but it is worth noting that that is - * only about the GC head, so we could do this trick only if the bud - * belongs to the GC head, but it does not seem to be worth an - * additional "if" statement. - * - * So, imagine the file-system is full, we run GC which is moving valid - * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head - * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X - * and will try to continue. Imagine that LEB X is currently the - * dirtiest LEB, and the amount of used space in LEB Y is exactly the - * same as amount of free space in LEB X. - * - * And a power cut happens when nodes are moved from LEB X to LEB Y. We - * are here trying to recover LEB Y which is the GC head LEB. We find - * the min. I/O unit B as described above. Then we clean-up LEB Y by - * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function - * fails, because it cannot find a dirty LEB which could be GC'd into - * LEB Y! Even LEB X does not match because the amount of valid nodes - * there does not fit the free space in LEB Y any more! And this is - * because of the padding node which we added to LEB Y. The - * user-visible effect of this which I once observed and analysed is - * that we cannot mount the file-system with -ENOSPC error. - * - * So obviously, to make sure that situation does not happen we should - * free min. I/O unit B in LEB Y completely and the last used min. I/O - * unit in LEB Y should be A. This is basically what the below code - * tries to do. - */ - while (min_io_unit == round_down(offs, c->min_io_size) && - min_io_unit != offs && - drop_last_node(sleb, &offs, grouped)); + if (jhead == GCHD) { + /* + * If this LEB belongs to the GC head then while we are in the + * middle of the same min. I/O unit keep dropping nodes. So + * basically, what we want is to make sure that the last min. + * I/O unit where we saw the corruption is dropped completely + * with all the uncorrupted nodes which may possibly sit there. + * + * In other words, let's name the min. I/O unit where the + * corruption starts B, and the previous min. I/O unit A. The + * below code tries to deal with a situation when half of B + * contains valid nodes or the end of a valid node, and the + * second half of B contains corrupted data or garbage. This + * means that UBIFS had been writing to B just before the power + * cut happened. I do not know how realistic is this scenario + * that half of the min. I/O unit had been written successfully + * and the other half not, but this is possible in our 'failure + * mode emulation' infrastructure at least. + * + * So what is the problem, why we need to drop those nodes? Why + * can't we just clean-up the second half of B by putting a + * padding node there? We can, and this works fine with one + * exception which was reproduced with power cut emulation + * testing and happens extremely rarely. + * + * Imagine the file-system is full, we run GC which starts + * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is + * the current GC head LEB). The @c->gc_lnum is -1, which means + * that GC will retain LEB X and will try to continue. Imagine + * that LEB X is currently the dirtiest LEB, and the amount of + * used space in LEB Y is exactly the same as amount of free + * space in LEB X. + * + * And a power cut happens when nodes are moved from LEB X to + * LEB Y. We are here trying to recover LEB Y which is the GC + * head LEB. We find the min. I/O unit B as described above. + * Then we clean-up LEB Y by padding min. I/O unit. And later + * 'ubifs_rcvry_gc_commit()' function fails, because it cannot + * find a dirty LEB which could be GC'd into LEB Y! Even LEB X + * does not match because the amount of valid nodes there does + * not fit the free space in LEB Y any more! And this is + * because of the padding node which we added to LEB Y. The + * user-visible effect of this which I once observed and + * analysed is that we cannot mount the file-system with + * -ENOSPC error. + * + * So obviously, to make sure that situation does not happen we + * should free min. I/O unit B in LEB Y completely and the last + * used min. I/O unit in LEB Y should be A. This is basically + * what the below code tries to do. + */ + while (offs > min_io_unit) + drop_last_node(sleb, &offs); + } buf = sbuf + offs; len = c->leb_size - offs; @@ -881,7 +905,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, } ubifs_scan_destroy(sleb); } - return ubifs_recover_leb(c, lnum, offs, sbuf, 0); + return ubifs_recover_leb(c, lnum, offs, sbuf, -1); } /** diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 6617280d167..5e97161ce4d 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -557,8 +557,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) * these LEBs could possibly be written to at the power cut * time. */ - sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, - b->bud->jhead != GCHD); + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); else sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); if (IS_ERR(sleb)) diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 46961c00323..9e1d05666fe 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -277,13 +277,18 @@ static int kick_a_thread(void) return 0; } -int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) +int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc) { + int nr = sc->nr_to_scan; int freed, contention = 0; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); if (nr == 0) - return clean_zn_cnt; + /* + * Due to the way UBIFS updates the clean znode counter it may + * temporarily be negative. + */ + return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; if (!clean_zn_cnt) { /* diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 6db0bdaa9f7..b5aeb5a8ebe 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -382,7 +382,7 @@ done: end_writeback(inode); } -static void ubifs_dirty_inode(struct inode *inode) +static void ubifs_dirty_inode(struct inode *inode, int flags) { struct ubifs_inode *ui = ubifs_inode(inode); @@ -811,15 +811,18 @@ static int alloc_wbufs(struct ubifs_info *c) c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; c->jheads[i].wbuf.jhead = i; + c->jheads[i].grouped = 1; } c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; /* * Garbage Collector head likely contains long-term data and - * does not need to be synchronized by timer. + * does not need to be synchronized by timer. Also GC head nodes are + * not grouped. */ c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; c->jheads[GCHD].wbuf.no_timer = 1; + c->jheads[GCHD].grouped = 0; return 0; } @@ -1284,12 +1287,25 @@ static int mount_ubifs(struct ubifs_info *c) if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { ubifs_msg("recovery needed"); c->need_recovery = 1; - if (!c->ro_mount) { - err = ubifs_recover_inl_heads(c, c->sbuf); - if (err) - goto out_master; - } - } else if (!c->ro_mount) { + } + + if (c->need_recovery && !c->ro_mount) { + err = ubifs_recover_inl_heads(c, c->sbuf); + if (err) + goto out_master; + } + + err = ubifs_lpt_init(c, 1, !c->ro_mount); + if (err) + goto out_master; + + if (!c->ro_mount && c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + goto out_master; + } + + if (!c->ro_mount) { /* * Set the "dirty" flag so that if we reboot uncleanly we * will notice this immediately on the next mount. @@ -1297,13 +1313,9 @@ static int mount_ubifs(struct ubifs_info *c) c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); err = ubifs_write_master(c); if (err) - goto out_master; + goto out_lpt; } - err = ubifs_lpt_init(c, 1, !c->ro_mount); - if (err) - goto out_lpt; - err = dbg_check_idx_size(c, c->bi.old_idx_sz); if (err) goto out_lpt; @@ -1396,12 +1408,6 @@ static int mount_ubifs(struct ubifs_info *c) } else ubifs_assert(c->lst.taken_empty_lebs > 0); - if (!c->ro_mount && c->space_fixup) { - err = ubifs_fixup_free_space(c); - if (err) - goto out_infos; - } - err = dbg_check_filesystem(c); if (err) goto out_infos; diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 8119b1fd8d9..91b4213dde8 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -2876,12 +2876,13 @@ static void tnc_destroy_cnext(struct ubifs_info *c) */ void ubifs_tnc_close(struct ubifs_info *c) { - long clean_freed; - tnc_destroy_cnext(c); if (c->zroot.znode) { - clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); - atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); + long n; + + ubifs_destroy_tnc_subtree(c->zroot.znode); + n = atomic_long_read(&c->clean_zn_cnt); + atomic_long_sub(n, &ubifs_clean_zn_cnt); } kfree(c->gap_lebs); kfree(c->ilebs); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 93d1412a06f..f79983d6f86 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -722,12 +722,14 @@ struct ubifs_bud { * struct ubifs_jhead - journal head. * @wbuf: head's write-buffer * @buds_list: list of bud LEBs belonging to this journal head + * @grouped: non-zero if UBIFS groups nodes when writing to this journal head * * Note, the @buds list is protected by the @c->buds_lock. */ struct ubifs_jhead { struct ubifs_wbuf wbuf; struct list_head buds_list; + unsigned int grouped:1; }; /** @@ -1614,7 +1616,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); /* shrinker.c */ -int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); +int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc); /* commit.c */ int ubifs_bg_thread(void *info); @@ -1742,7 +1744,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum); int ubifs_recover_master_node(struct ubifs_info *c); int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, - int offs, void *sbuf, int grouped); + int offs, void *sbuf, int jhead); struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf); int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 4d76594c2a8..f1dce848ef9 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -783,8 +783,6 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) struct fileIdentDesc *fi, cfi; struct kernel_lb_addr tloc; - dentry_unhash(dentry); - retval = -ENOENT; fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); if (!fi) @@ -1083,9 +1081,6 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, struct kernel_lb_addr tloc; struct udf_inode_info *old_iinfo = UDF_I(old_inode); - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); if (ofi) { if (ofibh.sbh != ofibh.ebh) diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 953ebdfc5bf..29309e25417 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -258,8 +258,6 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err= -ENOTEMPTY; - dentry_unhash(dentry); - lock_ufs(dir->i_sb); if (ufs_empty_dir (inode)) { err = ufs_unlink(dir, dentry); @@ -284,9 +282,6 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ufs_dir_entry *old_de; int err = -ENOENT; - if (new_inode && S_ISDIR(new_inode->i_mode)) - dentry_unhash(new_dentry); - old_de = ufs_find_entry(old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; diff --git a/fs/xattr.c b/fs/xattr.c index f1ef94974de..f060663ab70 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -46,18 +46,22 @@ xattr_permission(struct inode *inode, const char *name, int mask) return 0; /* - * The trusted.* namespace can only be accessed by a privileged user. + * The trusted.* namespace can only be accessed by privileged users. */ - if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) - return (capable(CAP_SYS_ADMIN) ? 0 : -EPERM); + if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) { + if (!capable(CAP_SYS_ADMIN)) + return (mask & MAY_WRITE) ? -EPERM : -ENODATA; + return 0; + } - /* In user.* namespace, only regular files and directories can have + /* + * In the user.* namespace, only regular files and directories can have * extended attributes. For sticky directories, only the owner and - * privileged user can write attributes. + * privileged users can write attributes. */ if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) { if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode)) - return -EPERM; + return (mask & MAY_WRITE) ? -EPERM : -ENODATA; if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && (mask & MAY_WRITE) && !inode_owner_or_capable(inode)) return -EPERM; @@ -87,7 +91,11 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, { struct inode *inode = dentry->d_inode; int error = -EOPNOTSUPP; + int issec = !strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN); + if (issec) + inode->i_flags &= ~S_NOSEC; if (inode->i_op->setxattr) { error = inode->i_op->setxattr(dentry, name, value, size, flags); if (!error) { @@ -95,8 +103,7 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name, security_inode_post_setxattr(dentry, name, value, size, flags); } - } else if (!strncmp(name, XATTR_SECURITY_PREFIX, - XATTR_SECURITY_PREFIX_LEN)) { + } else if (issec) { const char *suffix = name + XATTR_SECURITY_PREFIX_LEN; error = security_inode_setsecurity(inode, suffix, value, size, flags); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 98b9c91fcdf..1e3a7ce804d 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -925,7 +925,8 @@ xfs_fs_inode_init_once( */ STATIC void xfs_fs_dirty_inode( - struct inode *inode) + struct inode *inode, + int flags) { barrier(); XFS_I(inode)->i_update_core = 1; diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index a3252a5ead6..a756bc8d866 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -98,6 +98,9 @@ acpi_os_table_override(struct acpi_table_header *existing_table, /* * Spinlock primitives */ +acpi_status +acpi_os_create_lock(acpi_spinlock *out_handle); + void acpi_os_delete_lock(acpi_spinlock handle); acpi_cpu_flags acpi_os_acquire_lock(acpi_spinlock handle); diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index f6ad63d25b7..2ed0a8486c1 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -47,7 +47,7 @@ /* Current ACPICA subsystem version in YYYYMMDD format */ -#define ACPI_CA_VERSION 0x20110316 +#define ACPI_CA_VERSION 0x20110413 #include "actypes.h" #include "actbl.h" diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 64f838beaab..b67231bef63 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -501,8 +501,9 @@ typedef u64 acpi_integer; #define ACPI_STATE_D1 (u8) 1 #define ACPI_STATE_D2 (u8) 2 #define ACPI_STATE_D3 (u8) 3 -#define ACPI_D_STATES_MAX ACPI_STATE_D3 -#define ACPI_D_STATE_COUNT 4 +#define ACPI_STATE_D3_COLD (u8) 4 +#define ACPI_D_STATES_MAX ACPI_STATE_D3_COLD +#define ACPI_D_STATE_COUNT 5 #define ACPI_STATE_C0 (u8) 0 #define ACPI_STATE_C1 (u8) 1 @@ -712,8 +713,24 @@ typedef u8 acpi_adr_space_type; #define ACPI_ADR_SPACE_CMOS (acpi_adr_space_type) 5 #define ACPI_ADR_SPACE_PCI_BAR_TARGET (acpi_adr_space_type) 6 #define ACPI_ADR_SPACE_IPMI (acpi_adr_space_type) 7 -#define ACPI_ADR_SPACE_DATA_TABLE (acpi_adr_space_type) 8 -#define ACPI_ADR_SPACE_FIXED_HARDWARE (acpi_adr_space_type) 127 + +#define ACPI_NUM_PREDEFINED_REGIONS 8 + +/* + * Special Address Spaces + * + * Note: A Data Table region is a special type of operation region + * that has its own AML opcode. However, internally, the AML + * interpreter simply creates an operation region with an an address + * space type of ACPI_ADR_SPACE_DATA_TABLE. + */ +#define ACPI_ADR_SPACE_DATA_TABLE (acpi_adr_space_type) 0x7E /* Internal to ACPICA only */ +#define ACPI_ADR_SPACE_FIXED_HARDWARE (acpi_adr_space_type) 0x7F + +/* Values for _REG connection code */ + +#define ACPI_REG_DISCONNECT 0 +#define ACPI_REG_CONNECT 1 /* * bit_register IDs diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 55192ac0ced..ba4928cae47 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -310,14 +310,7 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit) /* in processor_core.c */ void acpi_processor_set_pdc(acpi_handle handle); -#ifdef CONFIG_SMP int acpi_get_cpuid(acpi_handle, int type, u32 acpi_id); -#else -static inline int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) -{ - return -1; -} -#endif /* in processor_throttling.c */ int acpi_processor_tstate_has_changed(struct acpi_processor *pr); diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index ff5c66080c8..fcdcb5d5c99 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -35,9 +35,9 @@ * platform data and other tables. */ -static inline int gpio_is_valid(int number) +static inline bool gpio_is_valid(int number) { - return ((unsigned)number) < ARCH_NR_GPIOS; + return number >= 0 && number < ARCH_NR_GPIOS; } struct device; @@ -193,8 +193,8 @@ struct gpio { }; extern int gpio_request_one(unsigned gpio, unsigned long flags, const char *label); -extern int gpio_request_array(struct gpio *array, size_t num); -extern void gpio_free_array(struct gpio *array, size_t num); +extern int gpio_request_array(const struct gpio *array, size_t num); +extern void gpio_free_array(const struct gpio *array, size_t num); #ifdef CONFIG_GPIO_SYSFS @@ -212,7 +212,7 @@ extern void gpio_unexport(unsigned gpio); #else /* !CONFIG_GPIOLIB */ -static inline int gpio_is_valid(int number) +static inline bool gpio_is_valid(int number) { /* only non-negative numbers are valid */ return number >= 0; diff --git a/include/asm-generic/unistd.h b/include/asm-generic/unistd.h index 33d52470488..4f76959397f 100644 --- a/include/asm-generic/unistd.h +++ b/include/asm-generic/unistd.h @@ -681,9 +681,13 @@ __SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \ __SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime) #define __NR_syncfs 267 __SYSCALL(__NR_syncfs, sys_syncfs) +#define __NR_setns 268 +__SYSCALL(__NR_setns, sys_setns) +#define __NR_sendmmsg 269 +__SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg) #undef __NR_syscalls -#define __NR_syscalls 268 +#define __NR_syscalls 270 /* * All syscalls below here should go away really, diff --git a/include/linux/acpi.h b/include/linux/acpi.h index a2e910e0129..1deb2a73c2d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -150,8 +150,7 @@ extern int ec_read(u8 addr, u8 *val); extern int ec_write(u8 addr, u8 val); extern int ec_transaction(u8 command, const u8 *wdata, unsigned wdata_len, - u8 *rdata, unsigned rdata_len, - int force_poll); + u8 *rdata, unsigned rdata_len); #if defined(CONFIG_ACPI_WMI) || defined(CONFIG_ACPI_WMI_MODULE) diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 96c038e43d6..ee456c79b0e 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -34,4 +34,17 @@ static inline int atomic_inc_not_zero_hint(atomic_t *v, int hint) } #endif +#ifndef CONFIG_ARCH_HAS_ATOMIC_OR +static inline void atomic_or(int i, atomic_t *v) +{ + int old; + int new; + + do { + old = atomic_read(v); + new = old | i; + } while (atomic_cmpxchg(v, old, new) != old); +} +#endif /* #ifndef CONFIG_ARCH_HAS_ATOMIC_OR */ + #endif /* _LINUX_ATOMIC_H */ diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index f20eb8f1602..e9eaec52265 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -146,7 +146,7 @@ static inline void cpuset_cpus_allowed(struct task_struct *p, static inline int cpuset_cpus_allowed_fallback(struct task_struct *p) { - cpumask_copy(&p->cpus_allowed, cpu_possible_mask); + do_set_cpus_allowed(p, cpu_possible_mask); return cpumask_any(cpu_active_mask); } diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 32a4423710f..4427e045405 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -191,6 +191,12 @@ struct dm_target { /* Used to provide an error string from the ctr */ char *error; + + /* + * Set if this target needs to receive discards regardless of + * whether or not its underlying devices have support. + */ + unsigned discards_supported:1; }; /* Each target can link one of these into the table */ diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index 5c9186b93ff..f4b0aa3126f 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -69,8 +69,7 @@ struct dm_io_request { * * Create/destroy may block. */ -struct dm_io_client *dm_io_client_create(unsigned num_pages); -int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client); +struct dm_io_client *dm_io_client_create(void); void dm_io_client_destroy(struct dm_io_client *client); /* diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 5db21631169..298d587e349 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -25,8 +25,7 @@ * To use kcopyd you must first create a dm_kcopyd_client object. */ struct dm_kcopyd_client; -int dm_kcopyd_client_create(unsigned num_pages, - struct dm_kcopyd_client **result); +struct dm_kcopyd_client *dm_kcopyd_client_create(void); void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); /* diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 5619f852273..bbd8661b347 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -9,8 +9,12 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) +#define VTD_STRIDE_SHIFT (9) +#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) + #define DMA_PTE_READ (1) #define DMA_PTE_WRITE (2) +#define DMA_PTE_LARGE_PAGE (1 << 7) #define DMA_PTE_SNP (1 << 11) #define CONTEXT_TT_MULTI_LEVEL 0 diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h index 6998d9376ef..4bfe0a2f7d5 100644 --- a/include/linux/dw_dmac.h +++ b/include/linux/dw_dmac.h @@ -3,6 +3,7 @@ * AVR32 systems.) * * Copyright (C) 2007 Atmel Corporation + * Copyright (C) 2010-2011 ST Microelectronics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/include/linux/efi.h b/include/linux/efi.h index 33fa1203024..e376270cd26 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -299,6 +299,7 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern unsigned long efi_get_time(void); extern int efi_set_rtc_mmss(unsigned long nowtime); +extern void efi_reserve_boot_services(void); extern struct efi_memory_map memmap; /** diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index 85c1d302c12..5e06acf95d0 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -909,7 +909,7 @@ extern int ext3_setattr (struct dentry *, struct iattr *); extern void ext3_evict_inode (struct inode *); extern int ext3_sync_inode (handle_t *, struct inode *); extern void ext3_discard_reservation (struct inode *); -extern void ext3_dirty_inode(struct inode *); +extern void ext3_dirty_inode(struct inode *, int); extern int ext3_change_inode_journal_flag(struct inode *, int); extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *); extern int ext3_can_truncate(struct inode *inode); diff --git a/include/linux/fs.h b/include/linux/fs.h index 241609346df..c55d6b7cd5d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -237,6 +237,7 @@ struct inodes_stat_t { #define S_PRIVATE 512 /* Inode is fs-internal */ #define S_IMA 1024 /* Inode has an associated IMA struct */ #define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ +#define S_NOSEC 4096 /* no suid or xattr security attributes */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -273,6 +274,7 @@ struct inodes_stat_t { #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) #define IS_IMA(inode) ((inode)->i_flags & S_IMA) #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) +#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ @@ -1618,7 +1620,7 @@ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*dirty_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); int (*write_inode) (struct inode *, struct writeback_control *wbc); int (*drop_inode) (struct inode *); void (*evict_inode) (struct inode *); @@ -2582,5 +2584,16 @@ int __init get_filesystem_list(char *buf); #define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ (flag & __FMODE_NONOTIFY))) +static inline int is_sxid(mode_t mode) +{ + return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); +} + +static inline void inode_has_no_xattr(struct inode *inode) +{ + if (!is_sxid(inode->i_mode)) + inode->i_flags |= S_NOSEC; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_FS_H */ diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index b5a550a39a7..59d3ef100eb 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -16,6 +16,11 @@ struct trace_print_flags { const char *name; }; +struct trace_print_flags_u64 { + unsigned long long mask; + const char *name; +}; + const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, unsigned long flags, const struct trace_print_flags *flag_array); @@ -23,6 +28,13 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim, const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, const struct trace_print_flags *symbol_array); +#if BITS_PER_LONG == 32 +const char *ftrace_print_symbols_seq_u64(struct trace_seq *p, + unsigned long long val, + const struct trace_print_flags_u64 + *symbol_array); +#endif + const char *ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int len); diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 32720baf70f..32d47e71066 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -25,9 +25,9 @@ struct gpio_chip; * warning when something is wrongly called. */ -static inline int gpio_is_valid(int number) +static inline bool gpio_is_valid(int number) { - return 0; + return false; } static inline int gpio_request(unsigned gpio, const char *label) @@ -41,7 +41,7 @@ static inline int gpio_request_one(unsigned gpio, return -ENOSYS; } -static inline int gpio_request_array(struct gpio *array, size_t num) +static inline int gpio_request_array(const struct gpio *array, size_t num) { return -ENOSYS; } @@ -54,7 +54,7 @@ static inline void gpio_free(unsigned gpio) WARN_ON(1); } -static inline void gpio_free_array(struct gpio *array, size_t num) +static inline void gpio_free_array(const struct gpio *array, size_t num) { might_sleep(); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 2a78aae78c6..027935c86c6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -264,6 +264,8 @@ struct mm_struct { struct linux_binfmt *binfmt; + cpumask_var_t cpu_vm_mask_var; + /* Architecture-specific MM context */ mm_context_t context; @@ -311,10 +313,18 @@ struct mm_struct { #ifdef CONFIG_TRANSPARENT_HUGEPAGE pgtable_t pmd_huge_pte; /* protected by page_table_lock */ #endif - - cpumask_var_t cpu_vm_mask_var; +#ifdef CONFIG_CPUMASK_OFFSTACK + struct cpumask cpumask_allocation; +#endif }; +static inline void mm_init_cpumask(struct mm_struct *mm) +{ +#ifdef CONFIG_CPUMASK_OFFSTACK + mm->cpu_vm_mask_var = &mm->cpumask_allocation; +#endif +} + /* Future-safe accessor for struct mm_struct's cpu_vm_mask. */ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) { diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h index d40bfa1d9c9..e5f21d293c7 100644 --- a/include/linux/mtd/physmap.h +++ b/include/linux/mtd/physmap.h @@ -19,6 +19,7 @@ #include <linux/mtd/partitions.h> struct map_info; +struct platform_device; struct physmap_flash_data { unsigned int width; diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 178fafe0ff9..504b289ba68 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -562,6 +562,7 @@ enum { NFSPROC4_CLNT_LAYOUTGET, NFSPROC4_CLNT_GETDEVICEINFO, NFSPROC4_CLNT_LAYOUTCOMMIT, + NFSPROC4_CLNT_LAYOUTRETURN, }; /* nfs41 types */ @@ -570,9 +571,11 @@ struct nfs4_sessionid { }; /* Create Session Flags */ -#define SESSION4_PERSIST 0x001 -#define SESSION4_BACK_CHAN 0x002 -#define SESSION4_RDMA 0x004 +#define SESSION4_PERSIST 0x001 +#define SESSION4_BACK_CHAN 0x002 +#define SESSION4_RDMA 0x004 + +#define SESSION4_FLAG_MASK_A 0x007 enum state_protect_how4 { SP4_NONE = 0, diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 91af2e49fa3..3a34e80ae92 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -68,7 +68,7 @@ struct nfs_pageio_descriptor { int pg_ioflags; int pg_error; struct pnfs_layout_segment *pg_lseg; - int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7e371f7df9c..5e8444a11ad 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -269,6 +269,27 @@ struct nfs4_layoutcommit_data { struct nfs4_layoutcommit_res res; }; +struct nfs4_layoutreturn_args { + __u32 layout_type; + struct inode *inode; + nfs4_stateid stateid; + struct nfs4_sequence_args seq_args; +}; + +struct nfs4_layoutreturn_res { + struct nfs4_sequence_res seq_res; + u32 lrs_present; + nfs4_stateid stateid; +}; + +struct nfs4_layoutreturn { + struct nfs4_layoutreturn_args args; + struct nfs4_layoutreturn_res res; + struct rpc_cred *cred; + struct nfs_client *clp; + int rpc_status; +}; + /* * Arguments to the open call. */ @@ -1087,6 +1108,7 @@ struct nfs_read_data { const struct rpc_call_ops *mds_ops; int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data); __u64 mds_offset; + int pnfs_error; struct page *page_array[NFS_PAGEVEC_SIZE]; }; @@ -1112,6 +1134,7 @@ struct nfs_write_data { unsigned long timestamp; /* For lease renewal */ #endif __u64 mds_offset; /* Filelayout dense stripe */ + int pnfs_error; struct page *page_array[NFS_PAGEVEC_SIZE]; }; diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 79a6700b716..6081493db68 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -308,7 +308,7 @@ static inline void SetPageUptodate(struct page *page) { #ifdef CONFIG_S390 if (!test_and_set_bit(PG_uptodate, &page->flags)) - page_set_storage_key(page_to_pfn(page), PAGE_DEFAULT_KEY, 0); + page_set_storage_key(page_to_phys(page), PAGE_DEFAULT_KEY, 0); #else /* * Memory barrier must be issued before setting the PG_uptodate bit, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 24787b75128..a311008af5e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2483,6 +2483,7 @@ #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX 0x1c5f #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0 0x1d40 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1 0x1d41 +#define PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI 0x1e31 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MIN 0x1e40 #define PCI_DEVICE_ID_INTEL_PANTHERPOINT_LPC_MAX 0x1e5f #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN 0x2310 diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h index 77cbddb3784..a7d87f911ca 100644 --- a/include/linux/pm_qos_params.h +++ b/include/linux/pm_qos_params.h @@ -16,6 +16,10 @@ #define PM_QOS_NUM_CLASSES 4 #define PM_QOS_DEFAULT_VALUE -1 +#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) +#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 + struct pm_qos_request_list { struct plist_node list; int pm_qos_class; diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h new file mode 100644 index 00000000000..76efbdd0162 --- /dev/null +++ b/include/linux/pnfs_osd_xdr.h @@ -0,0 +1,345 @@ +/* + * pNFS-osd on-the-wire data structures + * + * Copyright (C) 2007 Panasas Inc. [year of first publication] + * All rights reserved. + * + * Benny Halevy <bhalevy@panasas.com> + * Boaz Harrosh <bharrosh@panasas.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * See the file COPYING included with this distribution for more details. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the Panasas company nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __PNFS_OSD_XDR_H__ +#define __PNFS_OSD_XDR_H__ + +#include <linux/nfs_fs.h> +#include <linux/nfs_page.h> +#include <scsi/osd_protocol.h> + +#define PNFS_OSD_OSDNAME_MAXSIZE 256 + +/* + * draft-ietf-nfsv4-minorversion-22 + * draft-ietf-nfsv4-pnfs-obj-12 + */ + +/* Layout Structure */ + +enum pnfs_osd_raid_algorithm4 { + PNFS_OSD_RAID_0 = 1, + PNFS_OSD_RAID_4 = 2, + PNFS_OSD_RAID_5 = 3, + PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */ +}; + +/* struct pnfs_osd_data_map4 { + * uint32_t odm_num_comps; + * length4 odm_stripe_unit; + * uint32_t odm_group_width; + * uint32_t odm_group_depth; + * uint32_t odm_mirror_cnt; + * pnfs_osd_raid_algorithm4 odm_raid_algorithm; + * }; + */ +struct pnfs_osd_data_map { + u32 odm_num_comps; + u64 odm_stripe_unit; + u32 odm_group_width; + u32 odm_group_depth; + u32 odm_mirror_cnt; + u32 odm_raid_algorithm; +}; + +/* struct pnfs_osd_objid4 { + * deviceid4 oid_device_id; + * uint64_t oid_partition_id; + * uint64_t oid_object_id; + * }; + */ +struct pnfs_osd_objid { + struct nfs4_deviceid oid_device_id; + u64 oid_partition_id; + u64 oid_object_id; +}; + +/* For printout. I use: + * kprint("dev(%llx:%llx)", _DEVID_LO(pointer), _DEVID_HI(pointer)); + * BE style + */ +#define _DEVID_LO(oid_device_id) \ + (unsigned long long)be64_to_cpup((__be64 *)(oid_device_id)->data) + +#define _DEVID_HI(oid_device_id) \ + (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1) + +static inline int +pnfs_osd_objid_xdr_sz(void) +{ + return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2; +} + +enum pnfs_osd_version { + PNFS_OSD_MISSING = 0, + PNFS_OSD_VERSION_1 = 1, + PNFS_OSD_VERSION_2 = 2 +}; + +struct pnfs_osd_opaque_cred { + u32 cred_len; + void *cred; +}; + +enum pnfs_osd_cap_key_sec { + PNFS_OSD_CAP_KEY_SEC_NONE = 0, + PNFS_OSD_CAP_KEY_SEC_SSV = 1, +}; + +/* struct pnfs_osd_object_cred4 { + * pnfs_osd_objid4 oc_object_id; + * pnfs_osd_version4 oc_osd_version; + * pnfs_osd_cap_key_sec4 oc_cap_key_sec; + * opaque oc_capability_key<>; + * opaque oc_capability<>; + * }; + */ +struct pnfs_osd_object_cred { + struct pnfs_osd_objid oc_object_id; + u32 oc_osd_version; + u32 oc_cap_key_sec; + struct pnfs_osd_opaque_cred oc_cap_key; + struct pnfs_osd_opaque_cred oc_cap; +}; + +/* struct pnfs_osd_layout4 { + * pnfs_osd_data_map4 olo_map; + * uint32_t olo_comps_index; + * pnfs_osd_object_cred4 olo_components<>; + * }; + */ +struct pnfs_osd_layout { + struct pnfs_osd_data_map olo_map; + u32 olo_comps_index; + u32 olo_num_comps; + struct pnfs_osd_object_cred *olo_comps; +}; + +/* Device Address */ +enum pnfs_osd_targetid_type { + OBJ_TARGET_ANON = 1, + OBJ_TARGET_SCSI_NAME = 2, + OBJ_TARGET_SCSI_DEVICE_ID = 3, +}; + +/* union pnfs_osd_targetid4 switch (pnfs_osd_targetid_type4 oti_type) { + * case OBJ_TARGET_SCSI_NAME: + * string oti_scsi_name<>; + * + * case OBJ_TARGET_SCSI_DEVICE_ID: + * opaque oti_scsi_device_id<>; + * + * default: + * void; + * }; + * + * union pnfs_osd_targetaddr4 switch (bool ota_available) { + * case TRUE: + * netaddr4 ota_netaddr; + * case FALSE: + * void; + * }; + * + * struct pnfs_osd_deviceaddr4 { + * pnfs_osd_targetid4 oda_targetid; + * pnfs_osd_targetaddr4 oda_targetaddr; + * uint64_t oda_lun; + * opaque oda_systemid<>; + * pnfs_osd_object_cred4 oda_root_obj_cred; + * opaque oda_osdname<>; + * }; + */ +struct pnfs_osd_targetid { + u32 oti_type; + struct nfs4_string oti_scsi_device_id; +}; + +enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 }; + +/* struct netaddr4 { + * // see struct rpcb in RFC1833 + * string r_netid<>; // network id + * string r_addr<>; // universal address + * }; + */ +struct pnfs_osd_net_addr { + struct nfs4_string r_netid; + struct nfs4_string r_addr; +}; + +struct pnfs_osd_targetaddr { + u32 ota_available; + struct pnfs_osd_net_addr ota_netaddr; +}; + +enum { + NETWORK_ID_MAX = 16 / 4, + UNIVERSAL_ADDRESS_MAX = 64 / 4, + PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX, +}; + +struct pnfs_osd_deviceaddr { + struct pnfs_osd_targetid oda_targetid; + struct pnfs_osd_targetaddr oda_targetaddr; + u8 oda_lun[8]; + struct nfs4_string oda_systemid; + struct pnfs_osd_object_cred oda_root_obj_cred; + struct nfs4_string oda_osdname; +}; + +enum { + ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4, + PNFS_OSD_DEVICEADDR_MAX = + PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX + + 2 /*oda_lun*/ + + 1 + OSD_SYSTEMID_LEN + + 1 + ODA_OSDNAME_MAX, +}; + +/* LAYOUTCOMMIT: layoutupdate */ + +/* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) { + * case TRUE: + * int64_t dsu_delta; + * case FALSE: + * void; + * }; + * + * struct pnfs_osd_layoutupdate4 { + * pnfs_osd_deltaspaceused4 olu_delta_space_used; + * bool olu_ioerr_flag; + * }; + */ +struct pnfs_osd_layoutupdate { + u32 dsu_valid; + s64 dsu_delta; + u32 olu_ioerr_flag; +}; + +/* LAYOUTRETURN: I/O Rrror Report */ + +enum pnfs_osd_errno { + PNFS_OSD_ERR_EIO = 1, + PNFS_OSD_ERR_NOT_FOUND = 2, + PNFS_OSD_ERR_NO_SPACE = 3, + PNFS_OSD_ERR_BAD_CRED = 4, + PNFS_OSD_ERR_NO_ACCESS = 5, + PNFS_OSD_ERR_UNREACHABLE = 6, + PNFS_OSD_ERR_RESOURCE = 7 +}; + +/* struct pnfs_osd_ioerr4 { + * pnfs_osd_objid4 oer_component; + * length4 oer_comp_offset; + * length4 oer_comp_length; + * bool oer_iswrite; + * pnfs_osd_errno4 oer_errno; + * }; + */ +struct pnfs_osd_ioerr { + struct pnfs_osd_objid oer_component; + u64 oer_comp_offset; + u64 oer_comp_length; + u32 oer_iswrite; + u32 oer_errno; +}; + +/* OSD XDR API */ +/* Layout helpers */ +/* Layout decoding is done in two parts: + * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part + * of the layout. @iter members need not be initialized. + * Returned: + * @layout members are set. (@layout->olo_comps set to NULL). + * + * Zero on success, or negative error if passed xdr is broken. + * + * 2. 2nd Call pnfs_osd_xdr_decode_layout_comp() in a loop until it returns + * false, to decode the next component. + * Returned: + * true if there is more to decode or false if we are done or error. + * + * Example: + * struct pnfs_osd_xdr_decode_layout_iter iter; + * struct pnfs_osd_layout layout; + * struct pnfs_osd_object_cred comp; + * int status; + * + * status = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr); + * if (unlikely(status)) + * goto err; + * while(pnfs_osd_xdr_decode_layout_comp(&comp, &iter, xdr, &status)) { + * // All of @comp strings point to inside the xdr_buffer + * // or scrach buffer. Copy them out to user memory eg. + * copy_single_comp(dest_comp++, &comp); + * } + * if (unlikely(status)) + * goto err; + */ + +struct pnfs_osd_xdr_decode_layout_iter { + unsigned total_comps; + unsigned decoded_comps; +}; + +extern int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr); + +extern bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp, + struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr, + int *err); + +/* Device Info helpers */ + +/* Note: All strings inside @deviceaddr point to space inside @p. + * @p should stay valid while @deviceaddr is in use. + */ +extern void pnfs_osd_xdr_decode_deviceaddr( + struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p); + +/* layoutupdate (layout_commit) xdr helpers */ +extern int +pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, + struct pnfs_osd_layoutupdate *lou); + +/* osd_ioerror encoding/decoding (layout_return) */ +/* Client */ +extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr); +extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr); + +#endif /* __PNFS_OSD_XDR_H__ */ diff --git a/include/linux/sched.h b/include/linux/sched.h index dc8871295a5..2a8621c4be1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1546,7 +1546,7 @@ struct task_struct { #ifdef CONFIG_TRACING /* state flags for use by tracers */ unsigned long trace; - /* bitmask of trace recursion */ + /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ @@ -1841,9 +1841,16 @@ static inline void rcu_copy_process(struct task_struct *p) #endif #ifdef CONFIG_SMP +extern void do_set_cpus_allowed(struct task_struct *p, + const struct cpumask *new_mask); + extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); #else +static inline void do_set_cpus_allowed(struct task_struct *p, + const struct cpumask *new_mask) +{ +} static inline int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) { @@ -2187,7 +2194,6 @@ static inline void mmdrop(struct mm_struct * mm) if (unlikely(atomic_dec_and_test(&mm->mm_count))) __mmdrop(mm); } -extern int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm); /* mmput gets rid of the mappings and all user-space */ extern void mmput(struct mm_struct *); diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h index 77e62488339..c68a147939a 100644 --- a/include/linux/sunrpc/msg_prot.h +++ b/include/linux/sunrpc/msg_prot.h @@ -145,6 +145,7 @@ typedef __be32 rpc_fraghdr; #define RPCBIND_NETID_TCP "tcp" #define RPCBIND_NETID_UDP6 "udp6" #define RPCBIND_NETID_TCP6 "tcp6" +#define RPCBIND_NETID_LOCAL "local" /* * Note that RFC 1833 does not put any size restrictions on the diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 04dba23c59f..85c50b40759 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,6 +28,7 @@ struct svc_sock { /* private TCP part */ u32 sk_reclen; /* length of record */ u32 sk_tcplen; /* current read length */ + struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */ }; /* diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index fc84b7a19ca..a20970ef9e4 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -216,6 +216,8 @@ extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes); extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, unsigned int base, unsigned int len); extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p); +extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, unsigned int len); extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index a0f998c07c6..81cce3b3ee6 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -141,7 +141,8 @@ enum xprt_transports { XPRT_TRANSPORT_UDP = IPPROTO_UDP, XPRT_TRANSPORT_TCP = IPPROTO_TCP, XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, - XPRT_TRANSPORT_RDMA = 256 + XPRT_TRANSPORT_RDMA = 256, + XPRT_TRANSPORT_LOCAL = 257, }; struct rpc_xprt { diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 5b07792ccb4..ff7dc08696a 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -76,7 +76,7 @@ * tty device. It is solely the responsibility of the line * discipline to handle poll requests. * - * unsigned int (*receive_buf)(struct tty_struct *, const unsigned char *cp, + * void (*receive_buf)(struct tty_struct *, const unsigned char *cp, * char *fp, int count); * * This function is called by the low-level tty driver to send @@ -84,8 +84,7 @@ * processing. <cp> is a pointer to the buffer of input * character received by the device. <fp> is a pointer to a * pointer of flag bytes which indicate whether a character was - * received with a parity error, etc. Returns the amount of bytes - * received. + * received with a parity error, etc. * * void (*write_wakeup)(struct tty_struct *); * @@ -141,8 +140,8 @@ struct tty_ldisc_ops { /* * The following routines are called from below. */ - unsigned int (*receive_buf)(struct tty_struct *, - const unsigned char *cp, char *fp, int count); + void (*receive_buf)(struct tty_struct *, const unsigned char *cp, + char *fp, int count); void (*write_wakeup)(struct tty_struct *); void (*dcd_change)(struct tty_struct *, unsigned int, struct pps_event_time *); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index d512d98dfb7..5ca0951e185 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -93,8 +93,8 @@ static inline unsigned long __copy_from_user_nocache(void *to, * Safely read from address @src to the buffer at @dst. If a kernel fault * happens, handle that and return -EFAULT. */ -extern long probe_kernel_read(void *dst, void *src, size_t size); -extern long __probe_kernel_read(void *dst, void *src, size_t size); +extern long probe_kernel_read(void *dst, const void *src, size_t size); +extern long __probe_kernel_read(void *dst, const void *src, size_t size); /* * probe_kernel_write(): safely attempt to write to a location @@ -105,7 +105,7 @@ extern long __probe_kernel_read(void *dst, void *src, size_t size); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -extern long notrace probe_kernel_write(void *dst, void *src, size_t size); -extern long notrace __probe_kernel_write(void *dst, void *src, size_t size); +extern long notrace probe_kernel_write(void *dst, const void *src, size_t size); +extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size); #endif /* __LINUX_UACCESS_H__ */ diff --git a/include/linux/virtio.h b/include/linux/virtio.h index aff5b4f7404..71088574960 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -51,6 +51,13 @@ struct virtqueue { * This re-enables callbacks; it returns "false" if there are pending * buffers in the queue, to detect a possible race between the driver * checking for more work, and enabling callbacks. + * virtqueue_enable_cb_delayed: restart callbacks after disable_cb. + * vq: the struct virtqueue we're talking about. + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. * virtqueue_detach_unused_buf: detach first unused buffer * vq: the struct virtqueue we're talking about. * Returns NULL or the "data" token handed to add_buf @@ -86,6 +93,8 @@ void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); +bool virtqueue_enable_cb_delayed(struct virtqueue *vq); + void *virtqueue_detach_unused_buf(struct virtqueue *vq); /** diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index e68b439b286..277c4ad44e8 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_9P_H #define _LINUX_VIRTIO_9P_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include <linux/types.h> #include <linux/virtio_ids.h> #include <linux/virtio_config.h> diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index a50ecd1b81a..652dc8bea92 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_BALLOON_H #define _LINUX_VIRTIO_BALLOON_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include <linux/virtio_ids.h> #include <linux/virtio_config.h> diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 167720d695e..e0edb40ca7a 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_BLK_H #define _LINUX_VIRTIO_BLK_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include <linux/types.h> #include <linux/virtio_ids.h> #include <linux/virtio_config.h> diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 800617b4ddd..39c88c5ad19 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_CONFIG_H #define _LINUX_VIRTIO_CONFIG_H /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers. */ + * anyone can use the definitions to implement compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ /* Virtio devices use a standardized configuration space to define their * features and pass configuration information, but each implementation can diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index e4d333543a3..bdf4b003473 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -5,7 +5,31 @@ #include <linux/virtio_config.h> /* * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers. + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * Copyright (C) Red Hat, Inc., 2009, 2010, 2011 * Copyright (C) Amit Shah <amit.shah@redhat.com>, 2009, 2010, 2011 diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h index 06660c0a78d..85bb0bb66ff 100644 --- a/include/linux/virtio_ids.h +++ b/include/linux/virtio_ids.h @@ -5,7 +5,29 @@ * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. - */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #define VIRTIO_ID_NET 1 /* virtio net */ #define VIRTIO_ID_BLOCK 2 /* virtio block */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 085e42298ce..136040bba3e 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_NET_H #define _LINUX_VIRTIO_NET_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include <linux/types.h> #include <linux/virtio_ids.h> #include <linux/virtio_config.h> diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index 9a3d7c48c62..ea66f3f60d6 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -11,6 +11,29 @@ * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #ifndef _LINUX_VIRTIO_PCI_H diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index e4d144b132b..4a32cb6da42 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -7,6 +7,29 @@ * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * * Copyright Rusty Russell IBM Corporation 2007. */ #include <linux/types.h> @@ -29,6 +52,12 @@ /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC 28 +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ struct vring_desc { /* Address (guest-physical). */ @@ -83,6 +112,7 @@ struct vring { * __u16 avail_flags; * __u16 avail_idx; * __u16 available[num]; + * __u16 used_event_idx; * * // Padding to the next align boundary. * char pad[]; @@ -91,8 +121,14 @@ struct vring { * __u16 used_flags; * __u16 used_idx; * struct vring_used_elem used[num]; + * __u16 avail_event_idx; * }; */ +/* We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num]) + static inline void vring_init(struct vring *vr, unsigned int num, void *p, unsigned long align) { @@ -107,7 +143,21 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) { return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) + align - 1) & ~(align - 1)) - + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; + + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; +} + +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other size, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); } #ifdef __KERNEL__ diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index f445cff66ab..4114129f079 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -28,7 +28,7 @@ struct extent_buffer; { BTRFS_SHARED_DATA_REF_KEY, "SHARED_DATA_REF" }) #define __show_root_type(obj) \ - __print_symbolic(obj, \ + __print_symbolic_u64(obj, \ { BTRFS_ROOT_TREE_OBJECTID, "ROOT_TREE" }, \ { BTRFS_EXTENT_TREE_OBJECTID, "EXTENT_TREE" }, \ { BTRFS_CHUNK_TREE_OBJECTID, "CHUNK_TREE" }, \ @@ -125,7 +125,7 @@ DEFINE_EVENT(btrfs__inode, btrfs_inode_evict, ); #define __show_map_type(type) \ - __print_symbolic(type, \ + __print_symbolic_u64(type, \ { EXTENT_MAP_LAST_BYTE, "LAST_BYTE" }, \ { EXTENT_MAP_HOLE, "HOLE" }, \ { EXTENT_MAP_INLINE, "INLINE" }, \ diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 3e68366d485..533c49f4804 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -205,6 +205,19 @@ ftrace_print_symbols_seq(p, value, symbols); \ }) +#undef __print_symbolic_u64 +#if BITS_PER_LONG == 32 +#define __print_symbolic_u64(value, symbol_array...) \ + ({ \ + static const struct trace_print_flags_u64 symbols[] = \ + { symbol_array, { -1, NULL } }; \ + ftrace_print_symbols_seq_u64(p, value, symbols); \ + }) +#else +#define __print_symbolic_u64(value, symbol_array...) \ + __print_symbolic(value, symbol_array) +#endif + #undef __print_hex #define __print_hex(buf, buf_len) ftrace_print_hex_seq(p, buf, buf_len) diff --git a/init/main.c b/init/main.c index d2f1e086bf3..cafba67c13b 100644 --- a/init/main.c +++ b/init/main.c @@ -487,6 +487,7 @@ asmlinkage void __init start_kernel(void) printk(KERN_NOTICE "%s", linux_banner); setup_arch(&command_line); mm_init_owner(&init_mm, &init_task); + mm_init_cpumask(&init_mm); setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); @@ -510,7 +511,6 @@ asmlinkage void __init start_kernel(void) sort_main_extable(); trap_init(); mm_init(); - BUG_ON(mm_init_cpumask(&init_mm, 0)); /* * Set up the scheduler prior starting any interrupts (such as the diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1ceeb049c82..9c9b7545c81 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2190,7 +2190,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) rcu_read_lock(); cs = task_cs(tsk); if (cs) - cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed); + do_set_cpus_allowed(tsk, cs->cpus_allowed); rcu_read_unlock(); /* @@ -2217,7 +2217,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) * Like above we can temporary set any mask and rely on * set_cpus_allowed_ptr() as synchronization point. */ - cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask); + do_set_cpus_allowed(tsk, cpu_possible_mask); cpu = cpumask_any(cpu_active_mask); } diff --git a/kernel/events/core.c b/kernel/events/core.c index c09767f7db3..d863b3c057b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5028,6 +5028,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, else perf_event_output(event, nmi, data, regs); + if (event->fasync && event->pending_kill) { + if (nmi) { + event->pending_wakeup = 1; + irq_work_queue(&event->pending); + } else + perf_event_wakeup(event); + } + return ret; } diff --git a/kernel/fork.c b/kernel/fork.c index ca406d91671..0276c30401a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -484,20 +484,6 @@ static void mm_init_aio(struct mm_struct *mm) #endif } -int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm) -{ -#ifdef CONFIG_CPUMASK_OFFSTACK - if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL)) - return -ENOMEM; - - if (oldmm) - cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm)); - else - memset(mm_cpumask(mm), 0, cpumask_size()); -#endif - return 0; -} - static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) { atomic_set(&mm->mm_users, 1); @@ -538,17 +524,8 @@ struct mm_struct * mm_alloc(void) return NULL; memset(mm, 0, sizeof(*mm)); - mm = mm_init(mm, current); - if (!mm) - return NULL; - - if (mm_init_cpumask(mm, NULL)) { - mm_free_pgd(mm); - free_mm(mm); - return NULL; - } - - return mm; + mm_init_cpumask(mm); + return mm_init(mm, current); } /* @@ -559,7 +536,6 @@ struct mm_struct * mm_alloc(void) void __mmdrop(struct mm_struct *mm) { BUG_ON(mm == &init_mm); - free_cpumask_var(mm->cpu_vm_mask_var); mm_free_pgd(mm); destroy_context(mm); mmu_notifier_mm_destroy(mm); @@ -753,6 +729,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk) goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); + mm_init_cpumask(mm); /* Initializing for Swap token stuff */ mm->token_priority = 0; @@ -765,9 +742,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk) if (!mm_init(mm, tsk)) goto fail_nomem; - if (mm_init_cpumask(mm, oldmm)) - goto fail_nocpumask; - if (init_new_context(tsk, mm)) goto fail_nocontext; @@ -794,9 +768,6 @@ fail_nomem: return NULL; fail_nocontext: - free_cpumask_var(mm->cpu_vm_mask_var); - -fail_nocpumask: /* * If init_new_context() failed, we cannot use mmput() to free the mm * because it calls destroy_context() @@ -1591,6 +1562,13 @@ void __init proc_caches_init(void) fs_cachep = kmem_cache_create("fs_cache", sizeof(struct fs_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); + /* + * FIXME! The "sizeof(struct mm_struct)" currently includes the + * whole struct cpumask for the OFFSTACK case. We could change + * this to *only* allocate as much of it as required by the + * maximum number of CPU's we can ever have. The cpumask_allocation + * is at the end of the structure, exactly for that reason. + */ mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 74d1c099fbd..fa27e750dbc 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -105,9 +105,12 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start, } static void __jump_label_update(struct jump_label_key *key, - struct jump_entry *entry, int enable) + struct jump_entry *entry, + struct jump_entry *stop, int enable) { - for (; entry->key == (jump_label_t)(unsigned long)key; entry++) { + for (; (entry < stop) && + (entry->key == (jump_label_t)(unsigned long)key); + entry++) { /* * entry->code set to 0 invalidates module init text sections * kernel_text_address() verifies we are not in core kernel @@ -181,7 +184,11 @@ static void __jump_label_mod_update(struct jump_label_key *key, int enable) struct jump_label_mod *mod = key->next; while (mod) { - __jump_label_update(key, mod->entries, enable); + struct module *m = mod->mod; + + __jump_label_update(key, mod->entries, + m->jump_entries + m->num_jump_entries, + enable); mod = mod->next; } } @@ -245,7 +252,8 @@ static int jump_label_add_module(struct module *mod) key->next = jlm; if (jump_label_enabled(key)) - __jump_label_update(key, iter, JUMP_LABEL_ENABLE); + __jump_label_update(key, iter, iter_stop, + JUMP_LABEL_ENABLE); } return 0; @@ -371,7 +379,7 @@ static void jump_label_update(struct jump_label_key *key, int enable) /* if there are no users, entry can be NULL */ if (entry) - __jump_label_update(key, entry, enable); + __jump_label_update(key, entry, __stop___jump_table, enable); #ifdef CONFIG_MODULES __jump_label_mod_update(key, enable); diff --git a/kernel/kthread.c b/kernel/kthread.c index 3b34d2732bc..4ba7cccb499 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -202,8 +202,8 @@ void kthread_bind(struct task_struct *p, unsigned int cpu) return; } - p->cpus_allowed = cpumask_of_cpu(cpu); - p->rt.nr_cpus_allowed = 1; + /* It's safe because the task is inactive. */ + do_set_cpus_allowed(p, cpumask_of(cpu)); p->flags |= PF_THREAD_BOUND; } EXPORT_SYMBOL(kthread_bind); diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index fd8d1e035df..6824ca7d4d0 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -54,11 +54,17 @@ enum pm_qos_type { PM_QOS_MIN /* return the smallest value */ }; +/* + * Note: The lockless read path depends on the CPU accessing + * target_value atomically. Atomic access is only guaranteed on all CPU + * types linux supports for 32 bit quantites + */ struct pm_qos_object { struct plist_head requests; struct blocking_notifier_head *notifiers; struct miscdevice pm_qos_power_miscdev; char *name; + s32 target_value; /* Do not change to 64 bit */ s32 default_value; enum pm_qos_type type; }; @@ -71,7 +77,8 @@ static struct pm_qos_object cpu_dma_pm_qos = { .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), .notifiers = &cpu_dma_lat_notifier, .name = "cpu_dma_latency", - .default_value = 2000 * USEC_PER_SEC, + .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN, }; @@ -80,7 +87,8 @@ static struct pm_qos_object network_lat_pm_qos = { .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), .notifiers = &network_lat_notifier, .name = "network_latency", - .default_value = 2000 * USEC_PER_SEC, + .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN }; @@ -90,7 +98,8 @@ static struct pm_qos_object network_throughput_pm_qos = { .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), .notifiers = &network_throughput_notifier, .name = "network_throughput", - .default_value = 0, + .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, .type = PM_QOS_MAX, }; @@ -136,6 +145,16 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) } } +static inline s32 pm_qos_read_value(struct pm_qos_object *o) +{ + return o->target_value; +} + +static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value) +{ + o->target_value = value; +} + static void update_target(struct pm_qos_object *o, struct plist_node *node, int del, int value) { @@ -160,6 +179,7 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node, plist_add(node, &o->requests); } curr_value = pm_qos_get_value(o); + pm_qos_set_value(o, curr_value); spin_unlock_irqrestore(&pm_qos_lock, flags); if (prev_value != curr_value) @@ -194,18 +214,11 @@ static int find_pm_qos_object_by_minor(int minor) * pm_qos_request - returns current system wide qos expectation * @pm_qos_class: identification of which qos value is requested * - * This function returns the current target value in an atomic manner. + * This function returns the current target value. */ int pm_qos_request(int pm_qos_class) { - unsigned long flags; - int value; - - spin_lock_irqsave(&pm_qos_lock, flags); - value = pm_qos_get_value(pm_qos_array[pm_qos_class]); - spin_unlock_irqrestore(&pm_qos_lock, flags); - - return value; + return pm_qos_read_value(pm_qos_array[pm_qos_class]); } EXPORT_SYMBOL_GPL(pm_qos_request); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f07d2f03181..89419ff92e9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -36,7 +36,7 @@ #include <linux/interrupt.h> #include <linux/sched.h> #include <linux/nmi.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/bitops.h> #include <linux/module.h> #include <linux/completion.h> @@ -95,7 +95,6 @@ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); -static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq); DEFINE_PER_CPU(char, rcu_cpu_has_work); static char rcu_kthreads_spawnable; @@ -163,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); #ifdef CONFIG_NO_HZ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = 1, - .dynticks = 1, + .dynticks = ATOMIC_INIT(1), }; #endif /* #ifdef CONFIG_NO_HZ */ @@ -322,13 +321,25 @@ void rcu_enter_nohz(void) unsigned long flags; struct rcu_dynticks *rdtp; - smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); - rdtp->dynticks++; - rdtp->dynticks_nesting--; - WARN_ON_ONCE(rdtp->dynticks & 0x1); + if (--rdtp->dynticks_nesting) { + local_irq_restore(flags); + return; + } + /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ + smp_mb__before_atomic_inc(); /* See above. */ + atomic_inc(&rdtp->dynticks); + smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ + WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); local_irq_restore(flags); + + /* If the interrupt queued a callback, get out of dyntick mode. */ + if (in_irq() && + (__get_cpu_var(rcu_sched_data).nxtlist || + __get_cpu_var(rcu_bh_data).nxtlist || + rcu_preempt_needs_cpu(smp_processor_id()))) + set_need_resched(); } /* @@ -344,11 +355,16 @@ void rcu_exit_nohz(void) local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); - rdtp->dynticks++; - rdtp->dynticks_nesting++; - WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); + if (rdtp->dynticks_nesting++) { + local_irq_restore(flags); + return; + } + smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ + atomic_inc(&rdtp->dynticks); + /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ + smp_mb__after_atomic_inc(); /* See above. */ + WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); local_irq_restore(flags); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ } /** @@ -362,11 +378,15 @@ void rcu_nmi_enter(void) { struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - if (rdtp->dynticks & 0x1) + if (rdtp->dynticks_nmi_nesting == 0 && + (atomic_read(&rdtp->dynticks) & 0x1)) return; - rdtp->dynticks_nmi++; - WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ + rdtp->dynticks_nmi_nesting++; + smp_mb__before_atomic_inc(); /* Force delay from prior write. */ + atomic_inc(&rdtp->dynticks); + /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ + smp_mb__after_atomic_inc(); /* See above. */ + WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); } /** @@ -380,11 +400,14 @@ void rcu_nmi_exit(void) { struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - if (rdtp->dynticks & 0x1) + if (rdtp->dynticks_nmi_nesting == 0 || + --rdtp->dynticks_nmi_nesting != 0) return; - smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - rdtp->dynticks_nmi++; - WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); + /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ + smp_mb__before_atomic_inc(); /* See above. */ + atomic_inc(&rdtp->dynticks); + smp_mb__after_atomic_inc(); /* Force delay to next write. */ + WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); } /** @@ -395,13 +418,7 @@ void rcu_nmi_exit(void) */ void rcu_irq_enter(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - - if (rdtp->dynticks_nesting++) - return; - rdtp->dynticks++; - WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ + rcu_exit_nohz(); } /** @@ -413,18 +430,7 @@ void rcu_irq_enter(void) */ void rcu_irq_exit(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - - if (--rdtp->dynticks_nesting) - return; - smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - rdtp->dynticks++; - WARN_ON_ONCE(rdtp->dynticks & 0x1); - - /* If the interrupt queued a callback, get out of dyntick mode. */ - if (__this_cpu_read(rcu_sched_data.nxtlist) || - __this_cpu_read(rcu_bh_data.nxtlist)) - set_need_resched(); + rcu_enter_nohz(); } #ifdef CONFIG_SMP @@ -436,19 +442,8 @@ void rcu_irq_exit(void) */ static int dyntick_save_progress_counter(struct rcu_data *rdp) { - int ret; - int snap; - int snap_nmi; - - snap = rdp->dynticks->dynticks; - snap_nmi = rdp->dynticks->dynticks_nmi; - smp_mb(); /* Order sampling of snap with end of grace period. */ - rdp->dynticks_snap = snap; - rdp->dynticks_nmi_snap = snap_nmi; - ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0); - if (ret) - rdp->dynticks_fqs++; - return ret; + rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); + return 0; } /* @@ -459,16 +454,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) */ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { - long curr; - long curr_nmi; - long snap; - long snap_nmi; + unsigned long curr; + unsigned long snap; - curr = rdp->dynticks->dynticks; - snap = rdp->dynticks_snap; - curr_nmi = rdp->dynticks->dynticks_nmi; - snap_nmi = rdp->dynticks_nmi_snap; - smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); + snap = (unsigned long)rdp->dynticks_snap; /* * If the CPU passed through or entered a dynticks idle phase with @@ -478,8 +468,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * read-side critical section that started before the beginning * of the current RCU grace period. */ - if ((curr != snap || (curr & 0x1) == 0) && - (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) { + if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { rdp->dynticks_fqs++; return 1; } @@ -908,6 +897,12 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) unsigned long gp_duration; WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); + + /* + * Ensure that all grace-period and pre-grace-period activity + * is seen before the assignment to rsp->completed. + */ + smp_mb(); /* See above block comment. */ gp_duration = jiffies - rsp->gp_start; if (gp_duration > rsp->gp_max) rsp->gp_max = gp_duration; @@ -1455,25 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) */ static void rcu_process_callbacks(void) { - /* - * Memory references from any prior RCU read-side critical sections - * executed by the interrupted code must be seen before any RCU - * grace-period manipulations below. - */ - smp_mb(); /* See above block comment. */ - __rcu_process_callbacks(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); rcu_preempt_process_callbacks(); - /* - * Memory references from any later RCU read-side critical sections - * executed by the interrupted code must be seen after any RCU - * grace-period manipulations above. - */ - smp_mb(); /* See above block comment. */ - /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ rcu_needs_cpu_flush(); } @@ -1494,7 +1475,7 @@ static void invoke_rcu_cpu_kthread(void) local_irq_restore(flags); return; } - wake_up(&__get_cpu_var(rcu_cpu_wq)); + wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); local_irq_restore(flags); } @@ -1544,13 +1525,10 @@ static void rcu_cpu_kthread_setrt(int cpu, int to_rt) */ static void rcu_cpu_kthread_timer(unsigned long arg) { - unsigned long flags; struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); struct rcu_node *rnp = rdp->mynode; - raw_spin_lock_irqsave(&rnp->lock, flags); - rnp->wakemask |= rdp->grpmask; - raw_spin_unlock_irqrestore(&rnp->lock, flags); + atomic_or(rdp->grpmask, &rnp->wakemask); invoke_rcu_node_kthread(rnp); } @@ -1617,14 +1595,12 @@ static int rcu_cpu_kthread(void *arg) unsigned long flags; int spincnt = 0; unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); - wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu); char work; char *workp = &per_cpu(rcu_cpu_has_work, cpu); for (;;) { *statusp = RCU_KTHREAD_WAITING; - wait_event_interruptible(*wqp, - *workp != 0 || kthread_should_stop()); + rcu_wait(*workp != 0 || kthread_should_stop()); local_bh_disable(); if (rcu_cpu_kthread_should_stop(cpu)) { local_bh_enable(); @@ -1675,7 +1651,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); per_cpu(rcu_cpu_kthread_task, cpu) = t; - wake_up_process(t); sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); return 0; @@ -1698,11 +1673,10 @@ static int rcu_node_kthread(void *arg) for (;;) { rnp->node_kthread_status = RCU_KTHREAD_WAITING; - wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0); + rcu_wait(atomic_read(&rnp->wakemask) != 0); rnp->node_kthread_status = RCU_KTHREAD_RUNNING; raw_spin_lock_irqsave(&rnp->lock, flags); - mask = rnp->wakemask; - rnp->wakemask = 0; + mask = atomic_xchg(&rnp->wakemask, 0); rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { if ((mask & 0x1) == 0) @@ -1783,13 +1757,14 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, raw_spin_lock_irqsave(&rnp->lock, flags); rnp->node_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); - wake_up_process(t); sp.sched_priority = 99; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); } return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); } +static void rcu_wake_one_boost_kthread(struct rcu_node *rnp); + /* * Spawn all kthreads -- called as soon as the scheduler is running. */ @@ -1797,24 +1772,31 @@ static int __init rcu_spawn_kthreads(void) { int cpu; struct rcu_node *rnp; + struct task_struct *t; rcu_kthreads_spawnable = 1; for_each_possible_cpu(cpu) { - init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu)); per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) + if (cpu_online(cpu)) { (void)rcu_spawn_one_cpu_kthread(cpu); + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t) + wake_up_process(t); + } } rnp = rcu_get_root(rcu_state); - init_waitqueue_head(&rnp->node_wq); - rcu_init_boost_waitqueue(rnp); (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - if (NUM_RCU_NODES > 1) + if (rnp->node_kthread_task) + wake_up_process(rnp->node_kthread_task); + if (NUM_RCU_NODES > 1) { rcu_for_each_leaf_node(rcu_state, rnp) { - init_waitqueue_head(&rnp->node_wq); - rcu_init_boost_waitqueue(rnp); (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + t = rnp->node_kthread_task; + if (t) + wake_up_process(t); + rcu_wake_one_boost_kthread(rnp); } + } return 0; } early_initcall(rcu_spawn_kthreads); @@ -2218,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } -static void __cpuinit rcu_online_cpu(int cpu) +static void __cpuinit rcu_prepare_cpu(int cpu) { rcu_init_percpu_data(cpu, &rcu_sched_state, 0); rcu_init_percpu_data(cpu, &rcu_bh_state, 0); rcu_preempt_init_percpu_data(cpu); } -static void __cpuinit rcu_online_kthreads(int cpu) +static void __cpuinit rcu_prepare_kthreads(int cpu) { struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; @@ -2239,6 +2221,31 @@ static void __cpuinit rcu_online_kthreads(int cpu) } /* + * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state, + * but the RCU threads are woken on demand, and if demand is low this + * could be a while triggering the hung task watchdog. + * + * In order to avoid this, poke all tasks once the CPU is fully + * up and running. + */ +static void __cpuinit rcu_online_kthreads(int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); + struct rcu_node *rnp = rdp->mynode; + struct task_struct *t; + + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t) + wake_up_process(t); + + t = rnp->node_kthread_task; + if (t) + wake_up_process(t); + + rcu_wake_one_boost_kthread(rnp); +} + +/* * Handle CPU online/offline notification events. */ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, @@ -2251,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - rcu_online_cpu(cpu); - rcu_online_kthreads(cpu); + rcu_prepare_cpu(cpu); + rcu_prepare_kthreads(cpu); break; case CPU_ONLINE: + rcu_online_kthreads(cpu); case CPU_DOWN_FAILED: rcu_node_kthread_setaffinity(rnp, -1); rcu_cpu_kthread_setrt(cpu, 1); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 257664815d5..7b9a08b4aae 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -84,11 +84,9 @@ * Dynticks per-CPU state. */ struct rcu_dynticks { - int dynticks_nesting; /* Track nesting level, sort of. */ - int dynticks; /* Even value for dynticks-idle, else odd. */ - int dynticks_nmi; /* Even value for either dynticks-idle or */ - /* not in nmi handler, else odd. So this */ - /* remains even for nmi from irq handler. */ + int dynticks_nesting; /* Track irq/process nesting level. */ + int dynticks_nmi_nesting; /* Track NMI nesting level. */ + atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ }; /* RCU's kthread states for tracing. */ @@ -121,7 +119,9 @@ struct rcu_node { /* elements that need to drain to allow the */ /* current expedited grace period to */ /* complete (only for TREE_PREEMPT_RCU). */ - unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */ + atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */ + /* Since this has meaning only for leaf */ + /* rcu_node structures, 32 bits suffices. */ unsigned long qsmaskinit; /* Per-GP initial value for qsmask & expmask. */ unsigned long grpmask; /* Mask to apply to parent qsmask. */ @@ -159,9 +159,6 @@ struct rcu_node { struct task_struct *boost_kthread_task; /* kthread that takes care of priority */ /* boosting for this rcu_node structure. */ - wait_queue_head_t boost_wq; - /* Wait queue on which to park the boost */ - /* kthread. */ unsigned int boost_kthread_status; /* State of boost_kthread_task for tracing. */ unsigned long n_tasks_boosted; @@ -188,9 +185,6 @@ struct rcu_node { /* kthread that takes care of this rcu_node */ /* structure, for example, awakening the */ /* per-CPU kthreads as needed. */ - wait_queue_head_t node_wq; - /* Wait queue on which to park the per-node */ - /* kthread. */ unsigned int node_kthread_status; /* State of node_kthread_task for tracing. */ } ____cacheline_internodealigned_in_smp; @@ -284,7 +278,6 @@ struct rcu_data { /* 3) dynticks interface. */ struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ int dynticks_snap; /* Per-GP tracking for dynticks. */ - int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ #endif /* #ifdef CONFIG_NO_HZ */ /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ @@ -337,6 +330,16 @@ struct rcu_data { /* scheduling clock irq */ /* before ratting on them. */ +#define rcu_wait(cond) \ +do { \ + for (;;) { \ + set_current_state(TASK_INTERRUPTIBLE); \ + if (cond) \ + break; \ + schedule(); \ + } \ + __set_current_state(TASK_RUNNING); \ +} while (0) /* * RCU global state, including node hierarchy. This hierarchy is @@ -446,7 +449,6 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu); static void rcu_preempt_send_cbs_to_online(void); static void __init __rcu_init_preempt(void); static void rcu_needs_cpu_flush(void); -static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, cpumask_var_t cm); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3f6559a5f5c..c8bff3099a8 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1196,8 +1196,7 @@ static int rcu_boost_kthread(void *arg) for (;;) { rnp->boost_kthread_status = RCU_KTHREAD_WAITING; - wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks || - rnp->exp_tasks); + rcu_wait(rnp->boost_tasks || rnp->exp_tasks); rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; more2boost = rcu_boost(rnp); if (more2boost) @@ -1275,14 +1274,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) } /* - * Initialize the RCU-boost waitqueue. - */ -static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) -{ - init_waitqueue_head(&rnp->boost_wq); -} - -/* * Create an RCU-boost kthread for the specified node if one does not * already exist. We only create this kthread for preemptible RCU. * Returns zero if all is well, a negated errno otherwise. @@ -1306,12 +1297,17 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, raw_spin_lock_irqsave(&rnp->lock, flags); rnp->boost_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); - wake_up_process(t); sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); return 0; } +static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) +{ + if (rnp->boost_kthread_task) + wake_up_process(rnp->boost_kthread_task); +} + #else /* #ifdef CONFIG_RCU_BOOST */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) @@ -1328,10 +1324,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) { } -static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) -{ -} - static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp, int rnp_index) @@ -1339,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } +static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) +{ +} + #endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifndef CONFIG_SMP @@ -1520,7 +1516,6 @@ int rcu_needs_cpu(int cpu) { int c = 0; int snap; - int snap_nmi; int thatcpu; /* Check for being in the holdoff period. */ @@ -1531,10 +1526,10 @@ int rcu_needs_cpu(int cpu) for_each_online_cpu(thatcpu) { if (thatcpu == cpu) continue; - snap = per_cpu(rcu_dynticks, thatcpu).dynticks; - snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; + snap = atomic_add_return(0, &per_cpu(rcu_dynticks, + thatcpu).dynticks); smp_mb(); /* Order sampling of snap with end of grace period. */ - if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { + if ((snap & 0x1) != 0) { per_cpu(rcu_dyntick_drain, cpu) = 0; per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; return rcu_needs_cpu_quick_check(cpu); diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index aa0fd72b4bc..9678cc3650f 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c @@ -69,10 +69,10 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) rdp->passed_quiesc, rdp->passed_quiesc_completed, rdp->qs_pending); #ifdef CONFIG_NO_HZ - seq_printf(m, " dt=%d/%d dn=%d df=%lu", - rdp->dynticks->dynticks, + seq_printf(m, " dt=%d/%d/%d df=%lu", + atomic_read(&rdp->dynticks->dynticks), rdp->dynticks->dynticks_nesting, - rdp->dynticks->dynticks_nmi, + rdp->dynticks->dynticks_nmi_nesting, rdp->dynticks_fqs); #endif /* #ifdef CONFIG_NO_HZ */ seq_printf(m, " of=%lu ri=%lu", rdp->offline_fqs, rdp->resched_ipi); @@ -141,9 +141,9 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp) rdp->qs_pending); #ifdef CONFIG_NO_HZ seq_printf(m, ",%d,%d,%d,%lu", - rdp->dynticks->dynticks, + atomic_read(&rdp->dynticks->dynticks), rdp->dynticks->dynticks_nesting, - rdp->dynticks->dynticks_nmi, + rdp->dynticks->dynticks_nmi_nesting, rdp->dynticks_fqs); #endif /* #ifdef CONFIG_NO_HZ */ seq_printf(m, ",%lu,%lu", rdp->offline_fqs, rdp->resched_ipi); @@ -167,7 +167,7 @@ static int show_rcudata_csv(struct seq_file *m, void *unused) { seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); #ifdef CONFIG_NO_HZ - seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); + seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); #endif /* #ifdef CONFIG_NO_HZ */ seq_puts(m, "\"of\",\"ri\",\"ql\",\"b\",\"ci\",\"co\",\"ca\"\n"); #ifdef CONFIG_TREE_PREEMPT_RCU diff --git a/kernel/sched.c b/kernel/sched.c index 5e43e9dc65d..cbb3a0eee58 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2573,7 +2573,26 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu) if (!next) smp_send_reschedule(cpu); } -#endif + +#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW +static int ttwu_activate_remote(struct task_struct *p, int wake_flags) +{ + struct rq *rq; + int ret = 0; + + rq = __task_rq_lock(p); + if (p->on_cpu) { + ttwu_activate(rq, p, ENQUEUE_WAKEUP); + ttwu_do_wakeup(rq, p, wake_flags); + ret = 1; + } + __task_rq_unlock(rq); + + return ret; + +} +#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ +#endif /* CONFIG_SMP */ static void ttwu_queue(struct task_struct *p, int cpu) { @@ -2631,17 +2650,17 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) while (p->on_cpu) { #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW /* - * If called from interrupt context we could have landed in the - * middle of schedule(), in this case we should take care not - * to spin on ->on_cpu if p is current, since that would - * deadlock. + * In case the architecture enables interrupts in + * context_switch(), we cannot busy wait, since that + * would lead to deadlocks when an interrupt hits and + * tries to wake up @prev. So bail and do a complete + * remote wakeup. */ - if (p == current) { - ttwu_queue(p, cpu); + if (ttwu_activate_remote(p, wake_flags)) goto stat; - } -#endif +#else cpu_relax(); +#endif } /* * Pairs with the smp_wmb() in finish_lock_switch(). @@ -5841,7 +5860,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); - cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); + do_set_cpus_allowed(idle, cpumask_of(cpu)); /* * We're having a chicken and egg problem, even though we are * holding rq->lock, the cpu isn't yet set to this cpu so the @@ -5929,6 +5948,16 @@ static inline void sched_init_granularity(void) } #ifdef CONFIG_SMP +void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) +{ + if (p->sched_class && p->sched_class->set_cpus_allowed) + p->sched_class->set_cpus_allowed(p, new_mask); + else { + cpumask_copy(&p->cpus_allowed, new_mask); + p->rt.nr_cpus_allowed = cpumask_weight(new_mask); + } +} + /* * This is how migration works: * @@ -5974,12 +6003,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) goto out; } - if (p->sched_class->set_cpus_allowed) - p->sched_class->set_cpus_allowed(p, new_mask); - else { - cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = cpumask_weight(new_mask); - } + do_set_cpus_allowed(p, new_mask); /* Can the task run on the task's current CPU? If so, we're done */ if (cpumask_test_cpu(task_cpu(p), new_mask)) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e32a9b70ee9..433491c2dc8 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1076,8 +1076,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) se->on_rq = 0; update_cfs_load(cfs_rq, 0); account_entity_dequeue(cfs_rq, se); - update_min_vruntime(cfs_rq); - update_cfs_shares(cfs_rq); /* * Normalize the entity after updating the min_vruntime because the @@ -1086,6 +1084,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) */ if (!(flags & DEQUEUE_SLEEP)) se->vruntime -= cfs_rq->min_vruntime; + + update_min_vruntime(cfs_rq); + update_cfs_shares(cfs_rq); } /* diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 64b2a37c07d..88725c939e0 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -1263,6 +1263,7 @@ static int find_lowest_rq(struct task_struct *task) if (!cpumask_test_cpu(this_cpu, lowest_mask)) this_cpu = -1; /* Skip this_cpu opt if not among lowest */ + rcu_read_lock(); for_each_domain(cpu, sd) { if (sd->flags & SD_WAKE_AFFINE) { int best_cpu; @@ -1272,15 +1273,20 @@ static int find_lowest_rq(struct task_struct *task) * remote processor. */ if (this_cpu != -1 && - cpumask_test_cpu(this_cpu, sched_domain_span(sd))) + cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { + rcu_read_unlock(); return this_cpu; + } best_cpu = cpumask_first_and(lowest_mask, sched_domain_span(sd)); - if (best_cpu < nr_cpu_ids) + if (best_cpu < nr_cpu_ids) { + rcu_read_unlock(); return best_cpu; + } } } + rcu_read_unlock(); /* * And finally, if there were no matches within the domains diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 48ddf431db0..331e01bcd02 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -37,7 +37,7 @@ static int show_schedstat(struct seq_file *seq, void *v) #ifdef CONFIG_SMP /* domain-specific stats */ - preempt_disable(); + rcu_read_lock(); for_each_domain(cpu, sd) { enum cpu_idle_type itype; @@ -64,7 +64,7 @@ static int show_schedstat(struct seq_file *seq, void *v) sd->ttwu_wake_remote, sd->ttwu_move_affine, sd->ttwu_move_balance); } - preempt_enable(); + rcu_read_unlock(); #endif } kfree(mask_str); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d017c2c82c4..1ee417fcbfa 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -109,12 +109,18 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip); static void ftrace_global_list_func(unsigned long ip, unsigned long parent_ip) { - struct ftrace_ops *op = rcu_dereference_raw(ftrace_global_list); /*see above*/ + struct ftrace_ops *op; + + if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT))) + return; + trace_recursion_set(TRACE_GLOBAL_BIT); + op = rcu_dereference_raw(ftrace_global_list); /*see above*/ while (op != &ftrace_list_end) { op->func(ip, parent_ip); op = rcu_dereference_raw(op->next); /*see above*/ }; + trace_recursion_clear(TRACE_GLOBAL_BIT); } static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip) @@ -1638,12 +1644,12 @@ static void ftrace_startup_enable(int command) ftrace_run_update_code(command); } -static void ftrace_startup(struct ftrace_ops *ops, int command) +static int ftrace_startup(struct ftrace_ops *ops, int command) { bool hash_enable = true; if (unlikely(ftrace_disabled)) - return; + return -ENODEV; ftrace_start_up++; command |= FTRACE_ENABLE_CALLS; @@ -1662,6 +1668,8 @@ static void ftrace_startup(struct ftrace_ops *ops, int command) ftrace_hash_rec_enable(ops, 1); ftrace_startup_enable(command); + + return 0; } static void ftrace_shutdown(struct ftrace_ops *ops, int command) @@ -2501,7 +2509,7 @@ static void __enable_ftrace_function_probe(void) ret = __register_ftrace_function(&trace_probe_ops); if (!ret) - ftrace_startup(&trace_probe_ops, 0); + ret = ftrace_startup(&trace_probe_ops, 0); ftrace_probe_registered = 1; } @@ -3466,7 +3474,11 @@ device_initcall(ftrace_nodyn_init); static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; } static inline void ftrace_startup_enable(int command) { } /* Keep as macros so we do not need to define the commands */ -# define ftrace_startup(ops, command) do { } while (0) +# define ftrace_startup(ops, command) \ + ({ \ + (ops)->flags |= FTRACE_OPS_FL_ENABLED; \ + 0; \ + }) # define ftrace_shutdown(ops, command) do { } while (0) # define ftrace_startup_sysctl() do { } while (0) # define ftrace_shutdown_sysctl() do { } while (0) @@ -3484,6 +3496,10 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) { struct ftrace_ops *op; + if (unlikely(trace_recursion_test(TRACE_INTERNAL_BIT))) + return; + + trace_recursion_set(TRACE_INTERNAL_BIT); /* * Some of the ops may be dynamically allocated, * they must be freed after a synchronize_sched(). @@ -3496,6 +3512,7 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip) op = rcu_dereference_raw(op->next); }; preempt_enable_notrace(); + trace_recursion_clear(TRACE_INTERNAL_BIT); } static void clear_ftrace_swapper(void) @@ -3799,7 +3816,7 @@ int register_ftrace_function(struct ftrace_ops *ops) ret = __register_ftrace_function(ops); if (!ret) - ftrace_startup(ops, 0); + ret = ftrace_startup(ops, 0); out_unlock: @@ -4045,7 +4062,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc, ftrace_graph_return = retfunc; ftrace_graph_entry = entryfunc; - ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); + ret = ftrace_startup(&global_ops, FTRACE_START_FUNC_RET); out: mutex_unlock(&ftrace_lock); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 0ef7b4b2a1f..b0c7aa40794 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2216,7 +2216,7 @@ static noinline void trace_recursive_fail(void) printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:" "HC[%lu]:SC[%lu]:NMI[%lu]\n", - current->trace_recursion, + trace_recursion_buffer(), hardirq_count() >> HARDIRQ_SHIFT, softirq_count() >> SOFTIRQ_SHIFT, in_nmi()); @@ -2226,9 +2226,9 @@ static noinline void trace_recursive_fail(void) static inline int trace_recursive_lock(void) { - current->trace_recursion++; + trace_recursion_inc(); - if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH)) + if (likely(trace_recursion_buffer() < TRACE_RECURSIVE_DEPTH)) return 0; trace_recursive_fail(); @@ -2238,9 +2238,9 @@ static inline int trace_recursive_lock(void) static inline void trace_recursive_unlock(void) { - WARN_ON_ONCE(!current->trace_recursion); + WARN_ON_ONCE(!trace_recursion_buffer()); - current->trace_recursion--; + trace_recursion_dec(); } #else diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6b69c4bd306..229f8591f61 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -784,4 +784,19 @@ extern const char *__stop___trace_bprintk_fmt[]; FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print)) #include "trace_entries.h" +/* Only current can touch trace_recursion */ +#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0) +#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0) + +/* Ring buffer has the 10 LSB bits to count */ +#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff) + +/* for function tracing recursion */ +#define TRACE_INTERNAL_BIT (1<<11) +#define TRACE_GLOBAL_BIT (1<<12) + +#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0) +#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0) +#define trace_recursion_test(bit) ((current)->trace_recursion & (bit)) + #endif /* _LINUX_KERNEL_TRACE_H */ diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 2fe11034135..686ec399f2a 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -1657,7 +1657,12 @@ static struct ftrace_ops trace_ops __initdata = static __init void event_trace_self_test_with_function(void) { - register_ftrace_function(&trace_ops); + int ret; + ret = register_ftrace_function(&trace_ops); + if (WARN_ON(ret < 0)) { + pr_info("Failed to enable function tracer for event tests\n"); + return; + } pr_info("Running tests again, along with the function tracer\n"); event_trace_self_tests(); unregister_ftrace_function(&trace_ops); diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index cf535ccedc8..e37de492a9e 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -353,6 +353,33 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val, } EXPORT_SYMBOL(ftrace_print_symbols_seq); +#if BITS_PER_LONG == 32 +const char * +ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val, + const struct trace_print_flags_u64 *symbol_array) +{ + int i; + const char *ret = p->buffer + p->len; + + for (i = 0; symbol_array[i].name; i++) { + + if (val != symbol_array[i].mask) + continue; + + trace_seq_puts(p, symbol_array[i].name); + break; + } + + if (!p->len) + trace_seq_printf(p, "0x%llx", val); + + trace_seq_putc(p, 0); + + return ret; +} +EXPORT_SYMBOL(ftrace_print_symbols_seq_u64); +#endif + const char * ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len) { diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7daa4b072e9..3d0c56ad479 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -415,15 +415,13 @@ static void watchdog_nmi_disable(int cpu) { return; } #endif /* CONFIG_HARDLOCKUP_DETECTOR */ /* prepare/enable/disable routines */ -static int watchdog_prepare_cpu(int cpu) +static void watchdog_prepare_cpu(int cpu) { struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); WARN_ON(per_cpu(softlockup_watchdog, cpu)); hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; - - return 0; } static int watchdog_enable(int cpu) @@ -542,17 +540,16 @@ static int __cpuinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { int hotcpu = (unsigned long)hcpu; - int err = 0; switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - err = watchdog_prepare_cpu(hotcpu); + watchdog_prepare_cpu(hotcpu); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: if (watchdog_enabled) - err = watchdog_enable(hotcpu); + watchdog_enable(hotcpu); break; #ifdef CONFIG_HOTPLUG_CPU case CPU_UP_CANCELED: diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 28afa4c5333..dd373c8ee94 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -697,7 +697,7 @@ config DEBUG_BUGVERBOSE bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EXPERT depends on BUG depends on ARM || AVR32 || M32R || M68K || SPARC32 || SPARC64 || \ - FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 + FRV || SUPERH || GENERIC_BUG || BLACKFIN || MN10300 || TILE default y help Say Y here to make BUG() panics output the file name and line number diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 619313ed6c4..507a22fab73 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -144,7 +144,7 @@ static void init_shared_classes(void) #define HARDIRQ_ENTER() \ local_irq_disable(); \ - irq_enter(); \ + __irq_enter(); \ WARN_ON(!in_irq()); #define HARDIRQ_EXIT() \ diff --git a/mm/filemap.c b/mm/filemap.c index bcdc393b658..d7b10578a64 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1982,16 +1982,26 @@ static int __remove_suid(struct dentry *dentry, int kill) int file_remove_suid(struct file *file) { struct dentry *dentry = file->f_path.dentry; - int killsuid = should_remove_suid(dentry); - int killpriv = security_inode_need_killpriv(dentry); + struct inode *inode = dentry->d_inode; + int killsuid; + int killpriv; int error = 0; + /* Fast path for nothing security related */ + if (IS_NOSEC(inode)) + return 0; + + killsuid = should_remove_suid(dentry); + killpriv = security_inode_need_killpriv(dentry); + if (killpriv < 0) return killpriv; if (killpriv) error = security_inode_killpriv(dentry); if (!error && killsuid) error = __remove_suid(dentry, killsuid); + if (!error) + inode->i_flags |= S_NOSEC; return error; } @@ -2327,7 +2337,7 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping, repeat: page = find_lock_page(mapping, index); if (page) - return page; + goto found; page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~gfp_notmask); if (!page) @@ -2340,6 +2350,8 @@ repeat: goto repeat; return NULL; } +found: + wait_on_page_writeback(page); return page; } EXPORT_SYMBOL(grab_cache_page_write_begin); diff --git a/mm/maccess.c b/mm/maccess.c index e2b6f5634e0..4cee182ab5f 100644 --- a/mm/maccess.c +++ b/mm/maccess.c @@ -15,10 +15,10 @@ * happens, handle that and return -EFAULT. */ -long __weak probe_kernel_read(void *dst, void *src, size_t size) +long __weak probe_kernel_read(void *dst, const void *src, size_t size) __attribute__((alias("__probe_kernel_read"))); -long __probe_kernel_read(void *dst, void *src, size_t size) +long __probe_kernel_read(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); @@ -43,10 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read); * Safely write to address @dst from the buffer at @src. If a kernel fault * happens, handle that and return -EFAULT. */ -long __weak probe_kernel_write(void *dst, void *src, size_t size) +long __weak probe_kernel_write(void *dst, const void *src, size_t size) __attribute__((alias("__probe_kernel_write"))); -long __probe_kernel_write(void *dst, void *src, size_t size) +long __probe_kernel_write(void *dst, const void *src, size_t size) { long ret; mm_segment_t old_fs = get_fs(); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a4e1db3f198..4e8985acdab 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2247,10 +2247,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return NULL; -#ifndef CONFIG_ZONE_DMA - if (WARN_ON_ONCE(gfp_mask & __GFP_DMA)) - return NULL; -#endif /* * Check the zones suitable for the gfp_mask contain at least one diff --git a/mm/rmap.c b/mm/rmap.c index 3a39b518a65..0eb463ea88d 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -352,6 +352,11 @@ void __init anon_vma_init(void) * The page might have been remapped to a different anon_vma or the anon_vma * returned may already be freed (and even reused). * + * In case it was remapped to a different anon_vma, the new anon_vma will be a + * child of the old anon_vma, and the anon_vma lifetime rules will therefore + * ensure that any anon_vma obtained from the page will still be valid for as + * long as we observe page_mapped() [ hence all those page_mapped() tests ]. + * * All users of this function must be very careful when walking the anon_vma * chain and verify that the page in question is indeed mapped in it * [ something equivalent to page_mapped_in_vma() ]. @@ -405,6 +410,7 @@ out: struct anon_vma *page_lock_anon_vma(struct page *page) { struct anon_vma *anon_vma = NULL; + struct anon_vma *root_anon_vma; unsigned long anon_mapping; rcu_read_lock(); @@ -415,13 +421,15 @@ struct anon_vma *page_lock_anon_vma(struct page *page) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); - if (mutex_trylock(&anon_vma->root->mutex)) { + root_anon_vma = ACCESS_ONCE(anon_vma->root); + if (mutex_trylock(&root_anon_vma->mutex)) { /* - * If we observe a !0 refcount, then holding the lock ensures - * the anon_vma will not go away, see __put_anon_vma(). + * If the page is still mapped, then this anon_vma is still + * its anon_vma, and holding the mutex ensures that it will + * not go away, see anon_vma_free(). */ - if (!atomic_read(&anon_vma->refcount)) { - anon_vma_unlock(anon_vma); + if (!page_mapped(page)) { + mutex_unlock(&root_anon_vma->mutex); anon_vma = NULL; } goto out; @@ -1014,7 +1022,7 @@ void do_page_add_anon_rmap(struct page *page, return; VM_BUG_ON(!PageLocked(page)); - VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); + /* address might be in next vma when migration races vma_adjust */ if (first) __page_set_anon_rmap(page, vma, address, exclusive); else @@ -1709,7 +1717,7 @@ void hugepage_add_anon_rmap(struct page *page, BUG_ON(!PageLocked(page)); BUG_ON(!anon_vma); - BUG_ON(address < vma->vm_start || address >= vma->vm_end); + /* address might be in next vma when migration races vma_adjust */ first = atomic_inc_and_test(&page->_mapcount); if (first) __hugepage_set_anon_rmap(page, vma, address, 0); diff --git a/mm/shmem.c b/mm/shmem.c index 1acfb2687bf..d221a1cfd7b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1114,8 +1114,8 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) delete_from_page_cache(page); shmem_swp_set(info, entry, swap.val); shmem_swp_unmap(entry); - spin_unlock(&info->lock); swap_shmem_alloc(swap); + spin_unlock(&info->lock); BUG_ON(page_mapped(page)); swap_writepage(page, wbc); return 0; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8d83f9d4871..b84d7395535 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -13,10 +13,6 @@ * and need to be refreshed, or when a packet was damaged in transit. * This may be have to be moved to the VFS layer. * - * NB: BSD uses a more intelligent approach to guessing when a request - * or reply has been lost by keeping the RTO estimate for each procedure. - * We currently make do with a constant timeout value. - * * Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com> * Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de> */ @@ -32,7 +28,9 @@ #include <linux/slab.h> #include <linux/utsname.h> #include <linux/workqueue.h> +#include <linux/in.h> #include <linux/in6.h> +#include <linux/un.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -298,22 +296,27 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) * up a string representation of the passed-in address. */ if (args->servername == NULL) { + struct sockaddr_un *sun = + (struct sockaddr_un *)args->address; + struct sockaddr_in *sin = + (struct sockaddr_in *)args->address; + struct sockaddr_in6 *sin6 = + (struct sockaddr_in6 *)args->address; + servername[0] = '\0'; switch (args->address->sa_family) { - case AF_INET: { - struct sockaddr_in *sin = - (struct sockaddr_in *)args->address; + case AF_LOCAL: + snprintf(servername, sizeof(servername), "%s", + sun->sun_path); + break; + case AF_INET: snprintf(servername, sizeof(servername), "%pI4", &sin->sin_addr.s_addr); break; - } - case AF_INET6: { - struct sockaddr_in6 *sin = - (struct sockaddr_in6 *)args->address; + case AF_INET6: snprintf(servername, sizeof(servername), "%pI6", - &sin->sin6_addr); + &sin6->sin6_addr); break; - } default: /* caller wants default server name, but * address family isn't recognized. */ diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index c652e4cc9fe..9a80a922c52 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/socket.h> +#include <linux/un.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/kernel.h> @@ -32,6 +33,8 @@ # define RPCDBG_FACILITY RPCDBG_BIND #endif +#define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock" + #define RPCBIND_PROGRAM (100000u) #define RPCBIND_PORT (111u) @@ -158,20 +161,69 @@ static void rpcb_map_release(void *data) kfree(map); } -static const struct sockaddr_in rpcb_inaddr_loopback = { - .sin_family = AF_INET, - .sin_addr.s_addr = htonl(INADDR_LOOPBACK), - .sin_port = htons(RPCBIND_PORT), -}; +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local_unix(void) +{ + static const struct sockaddr_un rpcb_localaddr_rpcbind = { + .sun_family = AF_LOCAL, + .sun_path = RPCBIND_SOCK_PATHNAME, + }; + struct rpc_create_args args = { + .net = &init_net, + .protocol = XPRT_TRANSPORT_LOCAL, + .address = (struct sockaddr *)&rpcb_localaddr_rpcbind, + .addrsize = sizeof(rpcb_localaddr_rpcbind), + .servername = "localhost", + .program = &rpcb_program, + .version = RPCBVERS_2, + .authflavor = RPC_AUTH_NULL, + }; + struct rpc_clnt *clnt, *clnt4; + int result = 0; + + /* + * Because we requested an RPC PING at transport creation time, + * this works only if the user space portmapper is rpcbind, and + * it's listening on AF_LOCAL on the named socket. + */ + clnt = rpc_create(&args); + if (IS_ERR(clnt)) { + dprintk("RPC: failed to create AF_LOCAL rpcbind " + "client (errno %ld).\n", PTR_ERR(clnt)); + result = -PTR_ERR(clnt); + goto out; + } + + clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4); + if (IS_ERR(clnt4)) { + dprintk("RPC: failed to bind second program to " + "rpcbind v4 client (errno %ld).\n", + PTR_ERR(clnt4)); + clnt4 = NULL; + } + + /* Protected by rpcb_create_local_mutex */ + rpcb_local_clnt = clnt; + rpcb_local_clnt4 = clnt4; -static DEFINE_MUTEX(rpcb_create_local_mutex); +out: + return result; +} /* * Returns zero on success, otherwise a negative errno value * is returned. */ -static int rpcb_create_local(void) +static int rpcb_create_local_net(void) { + static const struct sockaddr_in rpcb_inaddr_loopback = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_LOOPBACK), + .sin_port = htons(RPCBIND_PORT), + }; struct rpc_create_args args = { .net = &init_net, .protocol = XPRT_TRANSPORT_TCP, @@ -186,13 +238,6 @@ static int rpcb_create_local(void) struct rpc_clnt *clnt, *clnt4; int result = 0; - if (rpcb_local_clnt) - return result; - - mutex_lock(&rpcb_create_local_mutex); - if (rpcb_local_clnt) - goto out; - clnt = rpc_create(&args); if (IS_ERR(clnt)) { dprintk("RPC: failed to create local rpcbind " @@ -214,10 +259,34 @@ static int rpcb_create_local(void) clnt4 = NULL; } + /* Protected by rpcb_create_local_mutex */ rpcb_local_clnt = clnt; rpcb_local_clnt4 = clnt4; out: + return result; +} + +/* + * Returns zero on success, otherwise a negative errno value + * is returned. + */ +static int rpcb_create_local(void) +{ + static DEFINE_MUTEX(rpcb_create_local_mutex); + int result = 0; + + if (rpcb_local_clnt) + return result; + + mutex_lock(&rpcb_create_local_mutex); + if (rpcb_local_clnt) + goto out; + + if (rpcb_create_local_unix() != 0) + result = rpcb_create_local_net(); + +out: mutex_unlock(&rpcb_create_local_mutex); return result; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 08e05a8ce02..2b90292e950 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -942,6 +942,8 @@ static void svc_unregister(const struct svc_serv *serv) if (progp->pg_vers[i]->vs_hidden) continue; + dprintk("svc: attempting to unregister %sv%u\n", + progp->pg_name, i); __svc_unregister(progp->pg_prog, i, progp->pg_name); } } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index b7d435c3f19..af04f779ce9 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, return len; } +static int svc_partial_recvfrom(struct svc_rqst *rqstp, + struct kvec *iov, int nr, + int buflen, unsigned int base) +{ + size_t save_iovlen; + void __user *save_iovbase; + unsigned int i; + int ret; + + if (base == 0) + return svc_recvfrom(rqstp, iov, nr, buflen); + + for (i = 0; i < nr; i++) { + if (iov[i].iov_len > base) + break; + base -= iov[i].iov_len; + } + save_iovlen = iov[i].iov_len; + save_iovbase = iov[i].iov_base; + iov[i].iov_len -= base; + iov[i].iov_base += base; + ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen); + iov[i].iov_len = save_iovlen; + iov[i].iov_base = save_iovbase; + return ret; +} + /* * Set socket snd and rcv buffer lengths */ @@ -409,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd, lock_sock(sock->sk); sock->sk->sk_sndbuf = snd * 2; sock->sk->sk_rcvbuf = rcv * 2; - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK; sock->sk->sk_write_space(sock->sk); release_sock(sock->sk); #endif @@ -884,6 +910,56 @@ failed: return NULL; } +static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return 0; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + if (rqstp->rq_pages[i] != NULL) + put_page(rqstp->rq_pages[i]); + BUG_ON(svsk->sk_pages[i] == NULL); + rqstp->rq_pages[i] = svsk->sk_pages[i]; + svsk->sk_pages[i] = NULL; + } + rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]); + return len; +} + +static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + return; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + svsk->sk_pages[i] = rqstp->rq_pages[i]; + rqstp->rq_pages[i] = NULL; + } +} + +static void svc_tcp_clear_pages(struct svc_sock *svsk) +{ + unsigned int i, len, npages; + + if (svsk->sk_tcplen <= sizeof(rpc_fraghdr)) + goto out; + len = svsk->sk_tcplen - sizeof(rpc_fraghdr); + npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + BUG_ON(svsk->sk_pages[i] == NULL); + put_page(svsk->sk_pages[i]); + svsk->sk_pages[i] = NULL; + } +out: + svsk->sk_tcplen = 0; +} + /* * Receive data. * If we haven't gotten the record length yet, get the next four bytes. @@ -893,31 +969,15 @@ failed: static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) { struct svc_serv *serv = svsk->sk_xprt.xpt_server; + unsigned int want; int len; - if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) - /* sndbuf needs to have room for one request - * per thread, otherwise we can stall even when the - * network isn't a bottleneck. - * - * We count all threads rather than threads in a - * particular pool, which provides an upper bound - * on the number of threads which will access the socket. - * - * rcvbuf just needs to be able to hold a few requests. - * Normally they will be removed from the queue - * as soon a a complete request arrives. - */ - svc_sock_setbufsize(svsk->sk_sock, - (serv->sv_nrthreads+3) * serv->sv_max_mesg, - 3 * serv->sv_max_mesg); - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { - int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; struct kvec iov; + want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; iov.iov_len = want; if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) @@ -927,7 +987,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) if (len < want) { dprintk("svc: short recvfrom while reading record " "length (%d of %d)\n", len, want); - goto err_again; /* record header not complete */ + return -EAGAIN; } svsk->sk_reclen = ntohl(svsk->sk_reclen); @@ -954,83 +1014,75 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) } } - /* Check whether enough data is available */ - len = svc_recv_available(svsk); - if (len < 0) - goto error; + if (svsk->sk_reclen < 8) + goto err_delete; /* client is nuts. */ - if (len < svsk->sk_reclen) { - dprintk("svc: incomplete TCP record (%d of %d)\n", - len, svsk->sk_reclen); - goto err_again; /* record not complete */ - } len = svsk->sk_reclen; - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); return len; - error: - if (len == -EAGAIN) - dprintk("RPC: TCP recv_record got EAGAIN\n"); +error: + dprintk("RPC: TCP recv_record got %d\n", len); return len; - err_delete: +err_delete: set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - err_again: return -EAGAIN; } -static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, - struct rpc_rqst **reqpp, struct kvec *vec) +static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) { + struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; struct rpc_rqst *req = NULL; - u32 *p; - u32 xid; - u32 calldir; - int len; - - len = svc_recvfrom(rqstp, vec, 1, 8); - if (len < 0) - goto error; + struct kvec *src, *dst; + __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base; + __be32 xid; + __be32 calldir; - p = (u32 *)rqstp->rq_arg.head[0].iov_base; xid = *p++; calldir = *p; - if (calldir == 0) { - /* REQUEST is the most common case */ - vec[0] = rqstp->rq_arg.head[0]; - } else { - /* REPLY */ - struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt; - - if (bc_xprt) - req = xprt_lookup_rqst(bc_xprt, xid); - - if (!req) { - printk(KERN_NOTICE - "%s: Got unrecognized reply: " - "calldir 0x%x xpt_bc_xprt %p xid %08x\n", - __func__, ntohl(calldir), - bc_xprt, xid); - vec[0] = rqstp->rq_arg.head[0]; - goto out; - } + if (bc_xprt) + req = xprt_lookup_rqst(bc_xprt, xid); - memcpy(&req->rq_private_buf, &req->rq_rcv_buf, - sizeof(struct xdr_buf)); - /* copy the xid and call direction */ - memcpy(req->rq_private_buf.head[0].iov_base, - rqstp->rq_arg.head[0].iov_base, 8); - vec[0] = req->rq_private_buf.head[0]; + if (!req) { + printk(KERN_NOTICE + "%s: Got unrecognized reply: " + "calldir 0x%x xpt_bc_xprt %p xid %08x\n", + __func__, ntohl(calldir), + bc_xprt, xid); + return -EAGAIN; } - out: - vec[0].iov_base += 8; - vec[0].iov_len -= 8; - len = svsk->sk_reclen - 8; - error: - *reqpp = req; - return len; + + memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); + /* + * XXX!: cheating for now! Only copying HEAD. + * But we know this is good enough for now (in fact, for any + * callback reply in the forseeable future). + */ + dst = &req->rq_private_buf.head[0]; + src = &rqstp->rq_arg.head[0]; + if (dst->iov_len < src->iov_len) + return -EAGAIN; /* whatever; just giving up. */ + memcpy(dst->iov_base, src->iov_base, src->iov_len); + xprt_complete_rqst(req->rq_task, svsk->sk_reclen); + rqstp->rq_arg.len = 0; + return 0; } +static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) +{ + int i = 0; + int t = 0; + + while (t < len) { + vec[i].iov_base = page_address(pages[i]); + vec[i].iov_len = PAGE_SIZE; + i++; + t += PAGE_SIZE; + } + return i; +} + + /* * Receive data from a TCP socket. */ @@ -1041,8 +1093,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) struct svc_serv *serv = svsk->sk_xprt.xpt_server; int len; struct kvec *vec; - int pnum, vlen; - struct rpc_rqst *req = NULL; + unsigned int want, base; + __be32 *p; + __be32 calldir; + int pnum; dprintk("svc: tcp_recv %p data %d conn %d close %d\n", svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), @@ -1053,87 +1107,73 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) if (len < 0) goto error; + base = svc_tcp_restore_pages(svsk, rqstp); + want = svsk->sk_reclen - base; + vec = rqstp->rq_vec; - vec[0] = rqstp->rq_arg.head[0]; - vlen = PAGE_SIZE; - /* - * We have enough data for the whole tcp record. Let's try and read the - * first 8 bytes to get the xid and the call direction. We can use this - * to figure out if this is a call or a reply to a callback. If - * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. - * In that case, don't bother with the calldir and just read the data. - * It will be rejected in svc_process. - */ - if (len >= 8) { - len = svc_process_calldir(svsk, rqstp, &req, vec); - if (len < 0) - goto err_again; - vlen -= 8; - } + pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], + svsk->sk_reclen); - pnum = 1; - while (vlen < len) { - vec[pnum].iov_base = (req) ? - page_address(req->rq_private_buf.pages[pnum - 1]) : - page_address(rqstp->rq_pages[pnum]); - vec[pnum].iov_len = PAGE_SIZE; - pnum++; - vlen += PAGE_SIZE; - } rqstp->rq_respages = &rqstp->rq_pages[pnum]; /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, len); - if (len < 0) - goto err_again; - - /* - * Account for the 8 bytes we read earlier - */ - len += 8; - - if (req) { - xprt_complete_rqst(req->rq_task, len); - len = 0; - goto out; + len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); + if (len >= 0) + svsk->sk_tcplen += len; + if (len != want) { + if (len < 0 && len != -EAGAIN) + goto err_other; + svc_tcp_save_pages(svsk, rqstp); + dprintk("svc: incomplete TCP record (%d of %d)\n", + svsk->sk_tcplen, svsk->sk_reclen); + goto err_noclose; } - dprintk("svc: TCP complete record (%d bytes)\n", len); - rqstp->rq_arg.len = len; + + rqstp->rq_arg.len = svsk->sk_reclen; rqstp->rq_arg.page_base = 0; - if (len <= rqstp->rq_arg.head[0].iov_len) { - rqstp->rq_arg.head[0].iov_len = len; + if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { + rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; rqstp->rq_arg.page_len = 0; - } else { - rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; - } + } else + rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len; rqstp->rq_xprt_ctxt = NULL; rqstp->rq_prot = IPPROTO_TCP; -out: + p = (__be32 *)rqstp->rq_arg.head[0].iov_base; + calldir = p[1]; + if (calldir) + len = receive_cb_reply(svsk, rqstp); + /* Reset TCP read info */ svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + /* If we have more data, signal svc_xprt_enqueue() to try again */ + if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + + if (len < 0) + goto error; svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt); if (serv->sv_stats) serv->sv_stats->nettcpcnt++; - return len; + dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len); + return rqstp->rq_arg.len; -err_again: - if (len == -EAGAIN) { - dprintk("RPC: TCP recvfrom got EAGAIN\n"); - return len; - } error: - if (len != -EAGAIN) { - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_xprt.xpt_server->sv_name, -len); - set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - } + if (len != -EAGAIN) + goto err_other; + dprintk("RPC: TCP recvfrom got EAGAIN\n"); return -EAGAIN; +err_other: + printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", + svsk->sk_xprt.xpt_server->sv_name, -len); + set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_noclose: + return -EAGAIN; /* record not complete */ } /* @@ -1304,18 +1344,10 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) svsk->sk_reclen = 0; svsk->sk_tcplen = 0; + memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; - /* initialise setting must have enough space to - * receive and respond to one request. - * svc_tcp_recvfrom will re-adjust if necessary - */ - svc_sock_setbufsize(svsk->sk_sock, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg, - 3 * svsk->sk_xprt.xpt_server->sv_max_mesg); - - set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); if (sk->sk_state != TCP_ESTABLISHED) set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1379,8 +1411,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Initialize the socket */ if (sock->type == SOCK_DGRAM) svc_udp_init(svsk, serv); - else + else { + /* initialise setting must have enough space to + * receive and respond to one request. + */ + svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, + 4 * serv->sv_max_mesg); svc_tcp_init(svsk, serv); + } dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); @@ -1562,8 +1600,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) svc_sock_detach(xprt); - if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) + if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + svc_tcp_clear_pages(svsk); kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); + } } /* diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 679cd674b81..f008c14ad34 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -638,6 +638,25 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p) } EXPORT_SYMBOL_GPL(xdr_init_decode); +/** + * xdr_init_decode - Initialize an xdr_stream for decoding data. + * @xdr: pointer to xdr_stream struct + * @buf: pointer to XDR buffer from which to decode data + * @pages: list of pages to decode into + * @len: length in bytes of buffer in pages + */ +void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, + struct page **pages, unsigned int len) +{ + memset(buf, 0, sizeof(*buf)); + buf->pages = pages; + buf->page_len = len; + buf->buflen = len; + buf->len = len; + xdr_init_decode(xdr, buf, NULL); +} +EXPORT_SYMBOL_GPL(xdr_init_decode_pages); + static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { __be32 *p = xdr->p; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bf005d3c65e..72abb735893 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -19,6 +19,7 @@ */ #include <linux/types.h> +#include <linux/string.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/capability.h> @@ -28,6 +29,7 @@ #include <linux/in.h> #include <linux/net.h> #include <linux/mm.h> +#include <linux/un.h> #include <linux/udp.h> #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> @@ -45,6 +47,9 @@ #include <net/tcp.h> #include "sunrpc.h" + +static void xs_close(struct rpc_xprt *xprt); + /* * xprtsock tunables */ @@ -261,6 +266,11 @@ static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) return (struct sockaddr *) &xprt->addr; } +static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt) +{ + return (struct sockaddr_un *) &xprt->addr; +} + static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) { return (struct sockaddr_in *) &xprt->addr; @@ -276,23 +286,34 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt) struct sockaddr *sap = xs_addr(xprt); struct sockaddr_in6 *sin6; struct sockaddr_in *sin; + struct sockaddr_un *sun; char buf[128]; - (void)rpc_ntop(sap, buf, sizeof(buf)); - xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL); - switch (sap->sa_family) { + case AF_LOCAL: + sun = xs_addr_un(xprt); + strlcpy(buf, sun->sun_path, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); + break; case AF_INET: + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); sin = xs_addr_in(xprt); snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr)); break; case AF_INET6: + (void)rpc_ntop(sap, buf, sizeof(buf)); + xprt->address_strings[RPC_DISPLAY_ADDR] = + kstrdup(buf, GFP_KERNEL); sin6 = xs_addr_in6(xprt); snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr); break; default: BUG(); } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL); } @@ -495,6 +516,70 @@ static int xs_nospace(struct rpc_task *task) return ret; } +/* + * Construct a stream transport record marker in @buf. + */ +static inline void xs_encode_stream_record_marker(struct xdr_buf *buf) +{ + u32 reclen = buf->len - sizeof(rpc_fraghdr); + rpc_fraghdr *base = buf->head[0].iov_base; + *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen); +} + +/** + * xs_local_send_request - write an RPC request to an AF_LOCAL socket + * @task: RPC task that manages the state of an RPC request + * + * Return values: + * 0: The request has been sent + * EAGAIN: The socket was blocked, please call again later to + * complete the request + * ENOTCONN: Caller needs to invoke connect logic then call again + * other: Some other error occured, the request was not sent + */ +static int xs_local_send_request(struct rpc_task *task) +{ + struct rpc_rqst *req = task->tk_rqstp; + struct rpc_xprt *xprt = req->rq_xprt; + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct xdr_buf *xdr = &req->rq_snd_buf; + int status; + + xs_encode_stream_record_marker(&req->rq_snd_buf); + + xs_pktdump("packet data:", + req->rq_svec->iov_base, req->rq_svec->iov_len); + + status = xs_sendpages(transport->sock, NULL, 0, + xdr, req->rq_bytes_sent); + dprintk("RPC: %s(%u) = %d\n", + __func__, xdr->len - req->rq_bytes_sent, status); + if (likely(status >= 0)) { + req->rq_bytes_sent += status; + req->rq_xmit_bytes_sent += status; + if (likely(req->rq_bytes_sent >= req->rq_slen)) { + req->rq_bytes_sent = 0; + return 0; + } + status = -EAGAIN; + } + + switch (status) { + case -EAGAIN: + status = xs_nospace(task); + break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); + case -EPIPE: + xs_close(xprt); + status = -ENOTCONN; + } + + return status; +} + /** * xs_udp_send_request - write an RPC request to a UDP socket * @task: address of RPC task that manages the state of an RPC request @@ -574,13 +659,6 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) kernel_sock_shutdown(sock, SHUT_WR); } -static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf) -{ - u32 reclen = buf->len - sizeof(rpc_fraghdr); - rpc_fraghdr *base = buf->head[0].iov_base; - *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen); -} - /** * xs_tcp_send_request - write an RPC request to a TCP socket * @task: address of RPC task that manages the state of an RPC request @@ -603,7 +681,7 @@ static int xs_tcp_send_request(struct rpc_task *task) struct xdr_buf *xdr = &req->rq_snd_buf; int status; - xs_encode_tcp_record_marker(&req->rq_snd_buf); + xs_encode_stream_record_marker(&req->rq_snd_buf); xs_pktdump("packet data:", req->rq_svec->iov_base, @@ -785,6 +863,88 @@ static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) return (struct rpc_xprt *) sk->sk_user_data; } +static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) +{ + struct xdr_skb_reader desc = { + .skb = skb, + .offset = sizeof(rpc_fraghdr), + .count = skb->len - sizeof(rpc_fraghdr), + }; + + if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0) + return -1; + if (desc.count) + return -1; + return 0; +} + +/** + * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets + * @sk: socket with data to read + * @len: how much data to read + * + * Currently this assumes we can read the whole reply in a single gulp. + */ +static void xs_local_data_ready(struct sock *sk, int len) +{ + struct rpc_task *task; + struct rpc_xprt *xprt; + struct rpc_rqst *rovr; + struct sk_buff *skb; + int err, repsize, copied; + u32 _xid; + __be32 *xp; + + read_lock_bh(&sk->sk_callback_lock); + dprintk("RPC: %s...\n", __func__); + xprt = xprt_from_sock(sk); + if (xprt == NULL) + goto out; + + skb = skb_recv_datagram(sk, 0, 1, &err); + if (skb == NULL) + goto out; + + if (xprt->shutdown) + goto dropit; + + repsize = skb->len - sizeof(rpc_fraghdr); + if (repsize < 4) { + dprintk("RPC: impossible RPC reply size %d\n", repsize); + goto dropit; + } + + /* Copy the XID from the skb... */ + xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid); + if (xp == NULL) + goto dropit; + + /* Look up and lock the request corresponding to the given XID */ + spin_lock(&xprt->transport_lock); + rovr = xprt_lookup_rqst(xprt, *xp); + if (!rovr) + goto out_unlock; + task = rovr->rq_task; + + copied = rovr->rq_private_buf.buflen; + if (copied > repsize) + copied = repsize; + + if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { + dprintk("RPC: sk_buff copy failed\n"); + goto out_unlock; + } + + xprt_complete_rqst(task, copied); + + out_unlock: + spin_unlock(&xprt->transport_lock); + dropit: + skb_free_datagram(sk, skb); + out: + read_unlock_bh(&sk->sk_callback_lock); +} + /** * xs_udp_data_ready - "data ready" callback for UDP sockets * @sk: socket with data to read @@ -1344,7 +1504,6 @@ static void xs_tcp_state_change(struct sock *sk) case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ xprt_force_disconnect(xprt); - case TCP_SYN_SENT: xprt->connect_cookie++; case TCP_CLOSING: /* @@ -1571,11 +1730,31 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) return err; } +/* + * We don't support autobind on AF_LOCAL sockets + */ +static void xs_local_rpcbind(struct rpc_task *task) +{ + xprt_set_bound(task->tk_xprt); +} + +static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port) +{ +} #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; +static inline void xs_reclassify_socketu(struct socket *sock) +{ + struct sock *sk = sock->sk; + + BUG_ON(sock_owned_by_user(sk)); + sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC", + &xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]); +} + static inline void xs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; @@ -1597,6 +1776,9 @@ static inline void xs_reclassify_socket6(struct socket *sock) static inline void xs_reclassify_socket(int family, struct socket *sock) { switch (family) { + case AF_LOCAL: + xs_reclassify_socketu(sock); + break; case AF_INET: xs_reclassify_socket4(sock); break; @@ -1606,6 +1788,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock) } } #else +static inline void xs_reclassify_socketu(struct socket *sock) +{ +} + static inline void xs_reclassify_socket4(struct socket *sock) { } @@ -1644,6 +1830,94 @@ out: return ERR_PTR(err); } +static int xs_local_finish_connecting(struct rpc_xprt *xprt, + struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + xs_save_old_callbacks(transport, sk); + + sk->sk_user_data = xprt; + sk->sk_data_ready = xs_local_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_error_report = xs_error_report; + sk->sk_allocation = GFP_ATOMIC; + + xprt_clear_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + xprt->stat.connect_count++; + xprt->stat.connect_start = jiffies; + return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0); +} + +/** + * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint + * @xprt: RPC transport to connect + * @transport: socket transport to connect + * @create_sock: function to create a socket of the correct type + * + * Invoked by a work queue tasklet. + */ +static void xs_local_setup_socket(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock; + int status = -EIO; + + if (xprt->shutdown) + goto out; + + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + status = __sock_create(xprt->xprt_net, AF_LOCAL, + SOCK_STREAM, 0, &sock, 1); + if (status < 0) { + dprintk("RPC: can't create AF_LOCAL " + "transport socket (%d).\n", -status); + goto out; + } + xs_reclassify_socketu(sock); + + dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + + status = xs_local_finish_connecting(xprt, sock); + switch (status) { + case 0: + dprintk("RPC: xprt %p connected to %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + xprt_set_connected(xprt); + break; + case -ENOENT: + dprintk("RPC: xprt %p: socket %s does not exist\n", + xprt, xprt->address_strings[RPC_DISPLAY_ADDR]); + break; + default: + printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n", + __func__, -status, + xprt->address_strings[RPC_DISPLAY_ADDR]); + } + +out: + xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); +} + static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1758,6 +2032,7 @@ static void xs_tcp_reuse_connection(struct sock_xprt *transport) static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret = -ENOTCONN; if (!transport->inet) { struct sock *sk = sock->sk; @@ -1789,12 +2064,22 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) } if (!xprt_bound(xprt)) - return -ENOTCONN; + goto out; /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; - return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); + switch (ret) { + case 0: + case -EINPROGRESS: + /* SYN_SENT! */ + xprt->connect_cookie++; + if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + } +out: + return ret; } /** @@ -1917,6 +2202,32 @@ static void xs_connect(struct rpc_task *task) } /** + * xs_local_print_stats - display AF_LOCAL socket-specifc stats + * @xprt: rpc_xprt struct containing statistics + * @seq: output file + * + */ +static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) +{ + long idle_time = 0; + + if (xprt_connected(xprt)) + idle_time = (long)(jiffies - xprt->last_used) / HZ; + + seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu " + "%llu %llu\n", + xprt->stat.bind_count, + xprt->stat.connect_count, + xprt->stat.connect_time, + idle_time, + xprt->stat.sends, + xprt->stat.recvs, + xprt->stat.bad_xids, + xprt->stat.req_u, + xprt->stat.bklog_u); +} + +/** * xs_udp_print_stats - display UDP socket-specifc stats * @xprt: rpc_xprt struct containing statistics * @seq: output file @@ -2014,10 +2325,7 @@ static int bc_sendto(struct rpc_rqst *req) unsigned long headoff; unsigned long tailoff; - /* - * Set up the rpc header and record marker stuff - */ - xs_encode_tcp_record_marker(xbufp); + xs_encode_stream_record_marker(xbufp); tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK; headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK; @@ -2089,6 +2397,21 @@ static void bc_destroy(struct rpc_xprt *xprt) { } +static struct rpc_xprt_ops xs_local_ops = { + .reserve_xprt = xprt_reserve_xprt, + .release_xprt = xs_tcp_release_xprt, + .rpcbind = xs_local_rpcbind, + .set_port = xs_local_set_port, + .connect = xs_connect, + .buf_alloc = rpc_malloc, + .buf_free = rpc_free, + .send_request = xs_local_send_request, + .set_retrans_timeout = xprt_set_retrans_timeout_def, + .close = xs_close, + .destroy = xs_destroy, + .print_stats = xs_local_print_stats, +}; + static struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, @@ -2150,6 +2473,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap) }; switch (family) { + case AF_LOCAL: + break; case AF_INET: memcpy(sap, &sin, sizeof(sin)); break; @@ -2197,6 +2522,70 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return xprt; } +static const struct rpc_timeout xs_local_default_timeout = { + .to_initval = 10 * HZ, + .to_maxval = 10 * HZ, + .to_retries = 2, +}; + +/** + * xs_setup_local - Set up transport to use an AF_LOCAL socket + * @args: rpc transport creation arguments + * + * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP + */ +static struct rpc_xprt *xs_setup_local(struct xprt_create *args) +{ + struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr; + struct sock_xprt *transport; + struct rpc_xprt *xprt; + struct rpc_xprt *ret; + + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + if (IS_ERR(xprt)) + return xprt; + transport = container_of(xprt, struct sock_xprt, xprt); + + xprt->prot = 0; + xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); + xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; + + xprt->bind_timeout = XS_BIND_TO; + xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; + xprt->idle_timeout = XS_IDLE_DISC_TO; + + xprt->ops = &xs_local_ops; + xprt->timeout = &xs_local_default_timeout; + + switch (sun->sun_family) { + case AF_LOCAL: + if (sun->sun_path[0] != '/') { + dprintk("RPC: bad AF_LOCAL address: %s\n", + sun->sun_path); + ret = ERR_PTR(-EINVAL); + goto out_err; + } + xprt_set_bound(xprt); + INIT_DELAYED_WORK(&transport->connect_worker, + xs_local_setup_socket); + xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); + break; + default: + ret = ERR_PTR(-EAFNOSUPPORT); + goto out_err; + } + + dprintk("RPC: set up xprt to %s via AF_LOCAL\n", + xprt->address_strings[RPC_DISPLAY_ADDR]); + + if (try_module_get(THIS_MODULE)) + return xprt; + ret = ERR_PTR(-EINVAL); +out_err: + xprt_free(xprt); + return ret; +} + static const struct rpc_timeout xs_udp_default_timeout = { .to_initval = 5 * HZ, .to_maxval = 30 * HZ, @@ -2438,6 +2827,14 @@ out_err: return ret; } +static struct xprt_class xs_local_transport = { + .list = LIST_HEAD_INIT(xs_local_transport.list), + .name = "named UNIX socket", + .owner = THIS_MODULE, + .ident = XPRT_TRANSPORT_LOCAL, + .setup = xs_setup_local, +}; + static struct xprt_class xs_udp_transport = { .list = LIST_HEAD_INIT(xs_udp_transport.list), .name = "udp", @@ -2473,6 +2870,7 @@ int init_socket_xprt(void) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif + xprt_register_transport(&xs_local_transport); xprt_register_transport(&xs_udp_transport); xprt_register_transport(&xs_tcp_transport); xprt_register_transport(&xs_bc_tcp_transport); @@ -2493,6 +2891,7 @@ void cleanup_socket_xprt(void) } #endif + xprt_unregister_transport(&xs_local_transport); xprt_unregister_transport(&xs_udp_transport); xprt_unregister_transport(&xs_tcp_transport); xprt_unregister_transport(&xs_bc_tcp_transport); diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 4be60364a40..f40a6af6bf4 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -43,6 +43,7 @@ #undef ELF_R_INFO #undef Elf_r_info #undef ELF_ST_BIND +#undef ELF_ST_TYPE #undef fn_ELF_R_SYM #undef fn_ELF_R_INFO #undef uint_t @@ -76,6 +77,7 @@ # define ELF_R_INFO ELF64_R_INFO # define Elf_r_info Elf64_r_info # define ELF_ST_BIND ELF64_ST_BIND +# define ELF_ST_TYPE ELF64_ST_TYPE # define fn_ELF_R_SYM fn_ELF64_R_SYM # define fn_ELF_R_INFO fn_ELF64_R_INFO # define uint_t uint64_t @@ -108,6 +110,7 @@ # define ELF_R_INFO ELF32_R_INFO # define Elf_r_info Elf32_r_info # define ELF_ST_BIND ELF32_ST_BIND +# define ELF_ST_TYPE ELF32_ST_TYPE # define fn_ELF_R_SYM fn_ELF32_R_SYM # define fn_ELF_R_INFO fn_ELF32_R_INFO # define uint_t uint32_t @@ -427,6 +430,11 @@ static unsigned find_secsym_ndx(unsigned const txtndx, if (txtndx == w2(symp->st_shndx) /* avoid STB_WEAK */ && (STB_LOCAL == st_bind || STB_GLOBAL == st_bind)) { + /* function symbols on ARM have quirks, avoid them */ + if (w2(ehdr->e_machine) == EM_ARM + && ELF_ST_TYPE(symp->st_info) == STT_FUNC) + continue; + *recvalp = _w(symp->st_value); return symp - sym0; } diff --git a/scripts/tags.sh b/scripts/tags.sh index bd6185d529c..75c5d24f199 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -132,7 +132,7 @@ exuberant() --regex-asm='/^ENTRY\(([^)]*)\).*/\1/' \ --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \ --regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/' \ - --regex-c++='/^DEFINE_EVENT\(([^,)]*).*/trace_\1/' + --regex-c++='/^DEFINE_EVENT\([^,)]*, *([^,)]*).*/trace_\1/' all_kconfigs | xargs $1 -a \ --langdef=kconfig --language-force=kconfig \ @@ -152,7 +152,9 @@ emacs() { all_sources | xargs $1 -a \ --regex='/^ENTRY(\([^)]*\)).*/\1/' \ - --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' + --regex='/^SYSCALL_DEFINE[0-9]?(\([^,)]*\).*/sys_\1/' \ + --regex='/^TRACE_EVENT(\([^,)]*\).*/trace_\1/' \ + --regex='/^DEFINE_EVENT([^,)]*, *\([^,)]*\).*/trace_\1/' all_kconfigs | xargs $1 -a \ --regex='/^[ \t]*\(\(menu\)*config\)[ \t]+\([a-zA-Z0-9_]+\)/\3/' diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index ae3a698415e..ec1bcecf2cd 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -593,7 +593,8 @@ static int apparmor_setprocattr(struct task_struct *task, char *name, sa.aad.op = OP_SETPROCATTR; sa.aad.info = name; sa.aad.error = -EINVAL; - return aa_audit(AUDIT_APPARMOR_DENIED, NULL, GFP_KERNEL, + return aa_audit(AUDIT_APPARMOR_DENIED, + __aa_current_profile(), GFP_KERNEL, &sa, NULL); } } else if (strcmp(name, "exec") == 0) { diff --git a/sound/soc/codecs/cx20442.c b/sound/soc/codecs/cx20442.c index f8c663dcff0..d68ea532cc7 100644 --- a/sound/soc/codecs/cx20442.c +++ b/sound/soc/codecs/cx20442.c @@ -262,14 +262,14 @@ static int v253_hangup(struct tty_struct *tty) } /* Line discipline .receive_buf() */ -static unsigned int v253_receive(struct tty_struct *tty, - const unsigned char *cp, char *fp, int count) +static void v253_receive(struct tty_struct *tty, + const unsigned char *cp, char *fp, int count) { struct snd_soc_codec *codec = tty->disc_data; struct cx20442_priv *cx20442; if (!codec) - return count; + return; cx20442 = snd_soc_codec_get_drvdata(codec); @@ -281,8 +281,6 @@ static unsigned int v253_receive(struct tty_struct *tty, codec->hw_write = (hw_write_t)tty->ops->write; codec->card->pop_time = 1; } - - return count; } /* Line discipline .write_wakeup() */ diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 1455413ec7a..032ba6398a5 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -215,11 +215,13 @@ LIB_FILE=$(OUTPUT)libperf.a LIB_H += ../../include/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h +LIB_H += ../../include/linux/const.h LIB_H += ../../include/linux/hash.h LIB_H += ../../include/linux/stringify.h LIB_H += util/include/linux/bitmap.h LIB_H += util/include/linux/bitops.h LIB_H += util/include/linux/compiler.h +LIB_H += util/include/linux/const.h LIB_H += util/include/linux/ctype.h LIB_H += util/include/linux/kernel.h LIB_H += util/include/linux/list.h diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index e18eb7ed30a..7b139e1e7e8 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -8,8 +8,6 @@ #include "builtin.h" #include "util/util.h" - -#include "util/util.h" #include "util/color.h" #include <linux/list.h> #include "util/cache.h" diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0974f957b8f..8e2c8579818 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -823,6 +823,16 @@ int cmd_record(int argc, const char **argv, const char *prefix __used) symbol__init(); + if (symbol_conf.kptr_restrict) + pr_warning( +"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" +"check /proc/sys/kernel/kptr_restrict.\n\n" +"Samples in kernel functions may not be resolved if a suitable vmlinux\n" +"file is not found in the buildid cache or in the vmlinux path.\n\n" +"Samples in kernel modules won't be resolved at all.\n\n" +"If some relocation was applied (e.g. kexec) symbols may be misresolved\n" +"even with a suitable vmlinux or kallsyms file.\n\n"); + if (no_buildid_cache || no_buildid) disable_buildid_cache(); diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 498c6f70a74..287a173523a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -116,6 +116,9 @@ static int process_sample_event(union perf_event *event, if (al.filtered || (hide_unresolved && al.sym == NULL)) return 0; + if (al.map != NULL) + al.map->dso->hit = 1; + if (perf_session__add_hist_entry(session, &al, sample, evsel)) { pr_debug("problem incrementing symbol period, skipping event\n"); return -1; @@ -249,6 +252,8 @@ static int __cmd_report(void) u64 nr_samples; struct perf_session *session; struct perf_evsel *pos; + struct map *kernel_map; + struct kmap *kernel_kmap; const char *help = "For a higher level overview, try: perf report --sort comm,dso"; signal(SIGINT, sig_handler); @@ -268,6 +273,24 @@ static int __cmd_report(void) if (ret) goto out_delete; + kernel_map = session->host_machine.vmlinux_maps[MAP__FUNCTION]; + kernel_kmap = map__kmap(kernel_map); + if (kernel_map == NULL || + (kernel_map->dso->hit && + (kernel_kmap->ref_reloc_sym == NULL || + kernel_kmap->ref_reloc_sym->addr == 0))) { + const struct dso *kdso = kernel_map->dso; + + ui__warning( +"Kernel address maps (/proc/{kallsyms,modules}) were restricted.\n\n" +"Check /proc/sys/kernel/kptr_restrict before running 'perf record'.\n\n%s\n\n" +"Samples in kernel modules can't be resolved as well.\n\n", + RB_EMPTY_ROOT(&kdso->symbols[MAP__FUNCTION]) ? +"As no suitable kallsyms nor vmlinux was found, kernel samples\n" +"can't be resolved." : +"If some relocation was applied (e.g. kexec) symbols may be misresolved."); + } + if (dump_trace) { perf_session__fprintf_nr_events(session, stdout); goto out_delete; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 974f6d3f4e5..22747de7234 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -10,7 +10,6 @@ #include "util/symbol.h" #include "util/thread.h" #include "util/trace-event.h" -#include "util/parse-options.h" #include "util/util.h" #include "util/evlist.h" #include "util/evsel.h" diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2d7934e9de3..f2f3f4937aa 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -62,8 +62,6 @@ #include <linux/unistd.h> #include <linux/types.h> -#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) - static struct perf_top top = { .count_filter = 5, .delay_secs = 2, @@ -82,6 +80,8 @@ static bool use_tui, use_stdio; static int default_interval = 0; +static bool kptr_restrict_warned; +static bool vmlinux_warned; static bool inherit = false; static int realtime_prio = 0; static bool group = false; @@ -740,7 +740,22 @@ static void perf_event__process_sample(const union perf_event *event, al.filtered) return; + if (!kptr_restrict_warned && + symbol_conf.kptr_restrict && + al.cpumode == PERF_RECORD_MISC_KERNEL) { + ui__warning( +"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" +"Check /proc/sys/kernel/kptr_restrict.\n\n" +"Kernel%s samples will not be resolved.\n", + !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? + " modules" : ""); + if (use_browser <= 0) + sleep(5); + kptr_restrict_warned = true; + } + if (al.sym == NULL) { + const char *msg = "Kernel samples will not be resolved.\n"; /* * As we do lazy loading of symtabs we only will know if the * specified vmlinux file is invalid when we actually have a @@ -752,12 +767,20 @@ static void perf_event__process_sample(const union perf_event *event, * --hide-kernel-symbols, even if the user specifies an * invalid --vmlinux ;-) */ - if (al.map == machine->vmlinux_maps[MAP__FUNCTION] && + if (!kptr_restrict_warned && !vmlinux_warned && + al.map == machine->vmlinux_maps[MAP__FUNCTION] && RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { - ui__warning("The %s file can't be used\n", - symbol_conf.vmlinux_name); - exit_browser(0); - exit(1); + if (symbol_conf.vmlinux_name) { + ui__warning("The %s file can't be used.\n%s", + symbol_conf.vmlinux_name, msg); + } else { + ui__warning("A vmlinux file was not found.\n%s", + msg); + } + + if (use_browser <= 0) + sleep(5); + vmlinux_warned = true; } return; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6635fcd11ca..0fe9adf7637 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -553,9 +553,18 @@ static int perf_event__process_kernel_mmap(union perf_event *event, goto out_problem; perf_event__set_kernel_mmap_len(event, machine->vmlinux_maps); - perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, - symbol_name, - event->mmap.pgoff); + + /* + * Avoid using a zero address (kptr_restrict) for the ref reloc + * symbol. Effectively having zero here means that at record + * time /proc/sys/kernel/kptr_restrict was non zero. + */ + if (event->mmap.pgoff != 0) { + perf_session__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, + symbol_name, + event->mmap.pgoff); + } + if (machine__is_default_guest(machine)) { /* * preload dso of guest kernel and modules diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ee0fe0dffa7..cca29ededb5 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -35,7 +35,17 @@ struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) { + int cpu, thread; evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); + + if (evsel->fd) { + for (cpu = 0; cpu < ncpus; cpu++) { + for (thread = 0; thread < nthreads; thread++) { + FD(evsel, cpu, thread) = -1; + } + } + } + return evsel->fd != NULL ? 0 : -ENOMEM; } diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 0717bebc764..afb0849fe53 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -193,9 +193,13 @@ int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, *linkname = malloc(size), *targetname; int len, err = -1; - if (is_kallsyms) + if (is_kallsyms) { + if (symbol_conf.kptr_restrict) { + pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); + return 0; + } realname = (char *)name; - else + } else realname = realpath(name, NULL); if (realname == NULL || filename == NULL || linkname == NULL) diff --git a/tools/perf/util/include/linux/const.h b/tools/perf/util/include/linux/const.h new file mode 100644 index 00000000000..1b476c9ae64 --- /dev/null +++ b/tools/perf/util/include/linux/const.h @@ -0,0 +1 @@ +#include "../../../../include/linux/const.h" diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 516876dfbe5..eec196329fd 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -676,9 +676,30 @@ discard_symbol: rb_erase(&pos->rb_node, root); return count + moved; } +static bool symbol__restricted_filename(const char *filename, + const char *restricted_filename) +{ + bool restricted = false; + + if (symbol_conf.kptr_restrict) { + char *r = realpath(filename, NULL); + + if (r != NULL) { + restricted = strcmp(r, restricted_filename) == 0; + free(r); + return restricted; + } + } + + return restricted; +} + int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map, symbol_filter_t filter) { + if (symbol__restricted_filename(filename, "/proc/kallsyms")) + return -1; + if (dso__load_all_kallsyms(dso, filename, map) < 0) return -1; @@ -1790,6 +1811,9 @@ static int machine__create_modules(struct machine *machine) modules = path; } + if (symbol__restricted_filename(path, "/proc/modules")) + return -1; + file = fopen(modules, "r"); if (file == NULL) return -1; @@ -2239,6 +2263,9 @@ static u64 machine__get_kernel_start_addr(struct machine *machine) } } + if (symbol__restricted_filename(filename, "/proc/kallsyms")) + return 0; + if (kallsyms__parse(filename, &args, symbol__in_kernel) <= 0) return 0; @@ -2410,6 +2437,25 @@ static int setup_list(struct strlist **list, const char *list_str, return 0; } +static bool symbol__read_kptr_restrict(void) +{ + bool value = false; + + if (geteuid() != 0) { + FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r"); + if (fp != NULL) { + char line[8]; + + if (fgets(line, sizeof(line), fp) != NULL) + value = atoi(line) != 0; + + fclose(fp); + } + } + + return value; +} + int symbol__init(void) { const char *symfs; @@ -2456,6 +2502,8 @@ int symbol__init(void) if (symfs != symbol_conf.symfs) free((void *)symfs); + symbol_conf.kptr_restrict = symbol__read_kptr_restrict(); + symbol_conf.initialized = true; return 0; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 242de0101a8..325ee36a9d2 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -75,7 +75,8 @@ struct symbol_conf { use_callchain, exclude_other, show_cpu_utilization, - initialized; + initialized, + kptr_restrict; const char *vmlinux_name, *kallsyms_name, *source_prefix, diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 1fd29b2daa9..cef28e6632b 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -788,7 +788,7 @@ sub wait_for_input sub reboot_to { if ($reboot_type eq "grub") { - run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch; reboot)'"; + run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch && reboot)'"; return; } @@ -1480,7 +1480,7 @@ sub process_config_ignore { or dodie "Failed to read $config"; while (<IN>) { - if (/^(.*?(CONFIG\S*)(=.*| is not set))/) { + if (/^((CONFIG\S*)=.*)/) { $config_ignore{$2} = $1; } } @@ -1638,7 +1638,7 @@ sub run_config_bisect { if (!$found) { # try the other half doprint "Top half produced no set configs, trying bottom half\n"; - @tophalf = @start_list[$half .. $#start_list]; + @tophalf = @start_list[$half + 1 .. $#start_list]; create_config @tophalf; read_current_config \%current_config; foreach my $config (@tophalf) { @@ -1690,7 +1690,7 @@ sub run_config_bisect { # remove half the configs we are looking at and see if # they are good. $half = int($#start_list / 2); - } while ($half > 0); + } while ($#start_list > 0); # we found a single config, try it again unless we are running manually diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index df0c6d2c386..74d3331bdaf 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -198,6 +198,14 @@ const struct option longopts[] = { .val = 'h', }, { + .name = "event-idx", + .val = 'E', + }, + { + .name = "no-event-idx", + .val = 'e', + }, + { .name = "indirect", .val = 'I', }, @@ -211,13 +219,17 @@ const struct option longopts[] = { static void help() { - fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n"); + fprintf(stderr, "Usage: virtio_test [--help]" + " [--no-indirect]" + " [--no-event-idx]" + "\n"); } int main(int argc, char **argv) { struct vdev_info dev; - unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC; + unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX); int o; for (;;) { @@ -228,6 +240,9 @@ int main(int argc, char **argv) case '?': help(); exit(2); + case 'e': + features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); + break; case 'h': help(); goto done; |