diff options
333 files changed, 18648 insertions, 7669 deletions
diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt index d89b4fe724d..a5eb7d19a65 100644 --- a/Documentation/block/cfq-iosched.txt +++ b/Documentation/block/cfq-iosched.txt @@ -102,6 +102,64 @@ processing of request. Therefore, increasing the value can imporve the performace although this can cause the latency of some I/O to increase due to more number of requests. +CFQ Group scheduling +==================== + +CFQ supports blkio cgroup and has "blkio." prefixed files in each +blkio cgroup directory. It is weight-based and there are four knobs +for configuration - weight[_device] and leaf_weight[_device]. +Internal cgroup nodes (the ones with children) can also have tasks in +them, so the former two configure how much proportion the cgroup as a +whole is entitled to at its parent's level while the latter two +configure how much proportion the tasks in the cgroup have compared to +its direct children. + +Another way to think about it is assuming that each internal node has +an implicit leaf child node which hosts all the tasks whose weight is +configured by leaf_weight[_device]. Let's assume a blkio hierarchy +composed of five cgroups - root, A, B, AA and AB - with the following +weights where the names represent the hierarchy. + + weight leaf_weight + root : 125 125 + A : 500 750 + B : 250 500 + AA : 500 500 + AB : 1000 500 + +root never has a parent making its weight is meaningless. For backward +compatibility, weight is always kept in sync with leaf_weight. B, AA +and AB have no child and thus its tasks have no children cgroup to +compete with. They always get 100% of what the cgroup won at the +parent level. Considering only the weights which matter, the hierarchy +looks like the following. + + root + / | \ + A B leaf + 500 250 125 + / | \ + AA AB leaf + 500 1000 750 + +If all cgroups have active IOs and competing with each other, disk +time will be distributed like the following. + +Distribution below root. The total active weight at this level is +A:500 + B:250 + C:125 = 875. + + root-leaf : 125 / 875 =~ 14% + A : 500 / 875 =~ 57% + B(-leaf) : 250 / 875 =~ 28% + +A has children and further distributes its 57% among the children and +the implicit leaf node. The total active weight at this level is +AA:500 + AB:1000 + A-leaf:750 = 2250. + + A-leaf : ( 750 / 2250) * A =~ 19% + AA(-leaf) : ( 500 / 2250) * A =~ 12% + AB(-leaf) : (1000 / 2250) * A =~ 25% + CFQ IOPS Mode for group scheduling =================================== Basic CFQ design is to provide priority based time slices. Higher priority diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt index a794ce91a2d..da272c8f44e 100644 --- a/Documentation/cgroups/blkio-controller.txt +++ b/Documentation/cgroups/blkio-controller.txt @@ -94,13 +94,11 @@ Throttling/Upper Limit policy Hierarchical Cgroups ==================== -- Currently none of the IO control policy supports hierarchical groups. But - cgroup interface does allow creation of hierarchical cgroups and internally - IO policies treat them as flat hierarchy. +- Currently only CFQ supports hierarchical groups. For throttling, + cgroup interface does allow creation of hierarchical cgroups and + internally it treats them as flat hierarchy. - So this patch will allow creation of cgroup hierarchcy but at the backend - everything will be treated as flat. So if somebody created a hierarchy like - as follows. + If somebody created a hierarchy like as follows. root / \ @@ -108,16 +106,20 @@ Hierarchical Cgroups | test3 - CFQ and throttling will practically treat all groups at same level. + CFQ will handle the hierarchy correctly but and throttling will + practically treat all groups at same level. For details on CFQ + hierarchy support, refer to Documentation/block/cfq-iosched.txt. + Throttling will treat the hierarchy as if it looks like the + following. pivot / / \ \ root test1 test2 test3 - Down the line we can implement hierarchical accounting/control support - and also introduce a new cgroup file "use_hierarchy" which will control - whether cgroup hierarchy is viewed as flat or hierarchical by the policy.. - This is how memory controller also has implemented the things. + Nesting cgroups, while allowed, isn't officially supported and blkio + genereates warning when cgroups nest. Once throttling implements + hierarchy support, hierarchy will be supported and the warning will + be removed. Various user visible config options =================================== @@ -172,6 +174,12 @@ Proportional weight policy files dev weight 8:16 300 +- blkio.leaf_weight[_device] + - Equivalents of blkio.weight[_device] for the purpose of + deciding how much weight tasks in the given cgroup has while + competing with the cgroup's child cgroups. For details, + please refer to Documentation/block/cfq-iosched.txt. + - blkio.time - disk time allocated to cgroup per device in milliseconds. First two fields specify the major and minor number of the device and @@ -279,6 +287,11 @@ Proportional weight policy files and minor number of the device and third field specifies the number of times a group was dequeued from a particular device. +- blkio.*_recursive + - Recursive version of various stats. These files show the + same information as their non-recursive counterparts but + include stats from all the descendant cgroups. + Throttling/Upper limit policy files ----------------------------------- - blkio.throttle.read_bps_device diff --git a/Documentation/devicetree/bindings/arm/armadeus.txt b/Documentation/devicetree/bindings/arm/armadeus.txt new file mode 100644 index 00000000000..9821283ff51 --- /dev/null +++ b/Documentation/devicetree/bindings/arm/armadeus.txt @@ -0,0 +1,6 @@ +Armadeus i.MX Platforms Device Tree Bindings +----------------------------------------------- + +APF51: i.MX51 based module. +Required root node properties: + - compatible = "armadeus,imx51-apf51", "fsl,imx51"; diff --git a/Documentation/devicetree/bindings/arm/fsl.txt b/Documentation/devicetree/bindings/arm/fsl.txt index f79818711e8..e935d7d4ac4 100644 --- a/Documentation/devicetree/bindings/arm/fsl.txt +++ b/Documentation/devicetree/bindings/arm/fsl.txt @@ -5,6 +5,14 @@ i.MX23 Evaluation Kit Required root node properties: - compatible = "fsl,imx23-evk", "fsl,imx23"; +i.MX25 Product Development Kit +Required root node properties: + - compatible = "fsl,imx25-pdk", "fsl,imx25"; + +i.MX27 Product Development Kit +Required root node properties: + - compatible = "fsl,imx27-pdk", "fsl,imx27"; + i.MX28 Evaluation Kit Required root node properties: - compatible = "fsl,imx28-evk", "fsl,imx28"; diff --git a/Documentation/devicetree/bindings/clock/imx5-clock.txt b/Documentation/devicetree/bindings/clock/imx5-clock.txt index 04ad47876be..2a0c904c46a 100644 --- a/Documentation/devicetree/bindings/clock/imx5-clock.txt +++ b/Documentation/devicetree/bindings/clock/imx5-clock.txt @@ -171,6 +171,7 @@ clocks and IDs. can_sel 156 can1_serial_gate 157 can1_ipg_gate 158 + owire_gate 159 Examples (for mx53): diff --git a/Documentation/devicetree/bindings/clock/imx6q-clock.txt b/Documentation/devicetree/bindings/clock/imx6q-clock.txt index f73fdf59556..969b38e06ad 100644 --- a/Documentation/devicetree/bindings/clock/imx6q-clock.txt +++ b/Documentation/devicetree/bindings/clock/imx6q-clock.txt @@ -203,6 +203,8 @@ clocks and IDs. pcie_ref 188 pcie_ref_125m 189 enet_ref 190 + usbphy1_gate 191 + usbphy2_gate 192 Examples: diff --git a/Documentation/devicetree/bindings/thermal/dove-thermal.txt b/Documentation/devicetree/bindings/thermal/dove-thermal.txt new file mode 100644 index 00000000000..6f474677d47 --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/dove-thermal.txt @@ -0,0 +1,18 @@ +* Dove Thermal + +This driver is for Dove SoCs which contain a thermal sensor. + +Required properties: +- compatible : "marvell,dove-thermal" +- reg : Address range of the thermal registers + +The reg properties should contain two ranges. The first is for the +three Thermal Manager registers, while the second range contains the +Thermal Diode Control Registers. + +Example: + + thermal@10078 { + compatible = "marvell,dove-thermal"; + reg = <0xd001c 0x0c>, <0xd005c 0x08>; + }; diff --git a/Documentation/devicetree/bindings/thermal/kirkwood-thermal.txt b/Documentation/devicetree/bindings/thermal/kirkwood-thermal.txt new file mode 100644 index 00000000000..8c0f5eb86da --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/kirkwood-thermal.txt @@ -0,0 +1,15 @@ +* Kirkwood Thermal + +This version is for Kirkwood 88F8262 & 88F6283 SoCs. Other kirkwoods +don't contain a thermal sensor. + +Required properties: +- compatible : "marvell,kirkwood-thermal" +- reg : Address range of the thermal registers + +Example: + + thermal@10078 { + compatible = "marvell,kirkwood-thermal"; + reg = <0x10078 0x4>; + }; diff --git a/Documentation/devicetree/bindings/thermal/rcar-thermal.txt b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt new file mode 100644 index 00000000000..28ef498a66e --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/rcar-thermal.txt @@ -0,0 +1,29 @@ +* Renesas R-Car Thermal + +Required properties: +- compatible : "renesas,rcar-thermal" +- reg : Address range of the thermal registers. + The 1st reg will be recognized as common register + if it has "interrupts". + +Option properties: + +- interrupts : use interrupt + +Example (non interrupt support): + +thermal@e61f0100 { + compatible = "renesas,rcar-thermal"; + reg = <0xe61f0100 0x38>; +}; + +Example (interrupt support): + +thermal@e61f0000 { + compatible = "renesas,rcar-thermal"; + reg = <0xe61f0000 0x14 + 0xe61f0100 0x38 + 0xe61f0200 0x38 + 0xe61f0300 0x38>; + interrupts = <0 69 4>; +}; diff --git a/Documentation/devicetree/bindings/arm/armada-370-xp-timer.txt b/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt index 64830118b01..36381129d14 100644 --- a/Documentation/devicetree/bindings/arm/armada-370-xp-timer.txt +++ b/Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt @@ -1,10 +1,13 @@ -Marvell Armada 370 and Armada XP Global Timers ----------------------------------------------- +Marvell Armada 370 and Armada XP Timers +--------------------------------------- Required properties: - compatible: Should be "marvell,armada-370-xp-timer" -- interrupts: Should contain the list of Global Timer interrupts -- reg: Should contain the base address of the Global Timer registers +- interrupts: Should contain the list of Global Timer interrupts and + then local timer interrupts +- reg: Should contain location and length for timers register. First + pair for the Global Timer registers, second pair for the + local/private timers. - clocks: clock driving the timer hardware Optional properties: diff --git a/Documentation/devicetree/bindings/w1/fsl-imx-owire.txt b/Documentation/devicetree/bindings/w1/fsl-imx-owire.txt new file mode 100644 index 00000000000..ecf42c07684 --- /dev/null +++ b/Documentation/devicetree/bindings/w1/fsl-imx-owire.txt @@ -0,0 +1,19 @@ +* Freescale i.MX One wire bus master controller + +Required properties: +- compatible : should be "fsl,imx21-owire" +- reg : Address and length of the register set for the device + +Optional properties: +- clocks : phandle of clock that supplies the module (required if platform + clock bindings use device tree) + +Example: + +- From imx53.dtsi: +owire: owire@63fa4000 { + compatible = "fsl,imx53-owire", "fsl,imx21-owire"; + reg = <0x63fa4000 0x4000>; + clocks = <&clks 159>; + status = "disabled"; +}; diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt index 0188903bc9e..4966b1be42a 100644 --- a/Documentation/dma-buf-sharing.txt +++ b/Documentation/dma-buf-sharing.txt @@ -302,7 +302,11 @@ Access to a dma_buf from the kernel context involves three steps: void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr) The vmap call can fail if there is no vmap support in the exporter, or if it - runs out of vmalloc space. Fallback to kmap should be implemented. + runs out of vmalloc space. Fallback to kmap should be implemented. Note that + the dma-buf layer keeps a reference count for all vmap access and calls down + into the exporter's vmap function only when no vmapping exists, and only + unmaps it once. Protection against concurrent vmap/vunmap calls is provided + by taking the dma_buf->lock mutex. 3. Finish access diff --git a/Documentation/thermal/exynos_thermal_emulation b/Documentation/thermal/exynos_thermal_emulation new file mode 100644 index 00000000000..b73bbfb697b --- /dev/null +++ b/Documentation/thermal/exynos_thermal_emulation @@ -0,0 +1,53 @@ +EXYNOS EMULATION MODE +======================== + +Copyright (C) 2012 Samsung Electronics + +Written by Jonghwa Lee <jonghwa3.lee@samsung.com> + +Description +----------- + +Exynos 4x12 (4212, 4412) and 5 series provide emulation mode for thermal management unit. +Thermal emulation mode supports software debug for TMU's operation. User can set temperature +manually with software code and TMU will read current temperature from user value not from +sensor's value. + +Enabling CONFIG_EXYNOS_THERMAL_EMUL option will make this support in available. +When it's enabled, sysfs node will be created under +/sys/bus/platform/devices/'exynos device name'/ with name of 'emulation'. + +The sysfs node, 'emulation', will contain value 0 for the initial state. When you input any +temperature you want to update to sysfs node, it automatically enable emulation mode and +current temperature will be changed into it. +(Exynos also supports user changable delay time which would be used to delay of + changing temperature. However, this node only uses same delay of real sensing time, 938us.) + +Exynos emulation mode requires synchronous of value changing and enabling. It means when you +want to update the any value of delay or next temperature, then you have to enable emulation +mode at the same time. (Or you have to keep the mode enabling.) If you don't, it fails to +change the value to updated one and just use last succeessful value repeatedly. That's why +this node gives users the right to change termerpature only. Just one interface makes it more +simply to use. + +Disabling emulation mode only requires writing value 0 to sysfs node. + + +TEMP 120 | + | + 100 | + | + 80 | + | +----------- + 60 | | | + | +-------------| | + 40 | | | | + | | | | + 20 | | | +---------- + | | | | | + 0 |______________|_____________|__________|__________|_________ + A A A A TIME + |<----->| |<----->| |<----->| | + | 938us | | | | | | +emulation : 0 50 | 70 | 20 | 0 +current temp : sensor 50 70 20 sensor diff --git a/Documentation/thermal/intel_powerclamp.txt b/Documentation/thermal/intel_powerclamp.txt new file mode 100644 index 00000000000..332de4a39b5 --- /dev/null +++ b/Documentation/thermal/intel_powerclamp.txt @@ -0,0 +1,307 @@ + ======================= + INTEL POWERCLAMP DRIVER + ======================= +By: Arjan van de Ven <arjan@linux.intel.com> + Jacob Pan <jacob.jun.pan@linux.intel.com> + +Contents: + (*) Introduction + - Goals and Objectives + + (*) Theory of Operation + - Idle Injection + - Calibration + + (*) Performance Analysis + - Effectiveness and Limitations + - Power vs Performance + - Scalability + - Calibration + - Comparison with Alternative Techniques + + (*) Usage and Interfaces + - Generic Thermal Layer (sysfs) + - Kernel APIs (TBD) + +============ +INTRODUCTION +============ + +Consider the situation where a system’s power consumption must be +reduced at runtime, due to power budget, thermal constraint, or noise +level, and where active cooling is not preferred. Software managed +passive power reduction must be performed to prevent the hardware +actions that are designed for catastrophic scenarios. + +Currently, P-states, T-states (clock modulation), and CPU offlining +are used for CPU throttling. + +On Intel CPUs, C-states provide effective power reduction, but so far +they’re only used opportunistically, based on workload. With the +development of intel_powerclamp driver, the method of synchronizing +idle injection across all online CPU threads was introduced. The goal +is to achieve forced and controllable C-state residency. + +Test/Analysis has been made in the areas of power, performance, +scalability, and user experience. In many cases, clear advantage is +shown over taking the CPU offline or modulating the CPU clock. + + +=================== +THEORY OF OPERATION +=================== + +Idle Injection +-------------- + +On modern Intel processors (Nehalem or later), package level C-state +residency is available in MSRs, thus also available to the kernel. + +These MSRs are: + #define MSR_PKG_C2_RESIDENCY 0x60D + #define MSR_PKG_C3_RESIDENCY 0x3F8 + #define MSR_PKG_C6_RESIDENCY 0x3F9 + #define MSR_PKG_C7_RESIDENCY 0x3FA + +If the kernel can also inject idle time to the system, then a +closed-loop control system can be established that manages package +level C-state. The intel_powerclamp driver is conceived as such a +control system, where the target set point is a user-selected idle +ratio (based on power reduction), and the error is the difference +between the actual package level C-state residency ratio and the target idle +ratio. + +Injection is controlled by high priority kernel threads, spawned for +each online CPU. + +These kernel threads, with SCHED_FIFO class, are created to perform +clamping actions of controlled duty ratio and duration. Each per-CPU +thread synchronizes its idle time and duration, based on the rounding +of jiffies, so accumulated errors can be prevented to avoid a jittery +effect. Threads are also bound to the CPU such that they cannot be +migrated, unless the CPU is taken offline. In this case, threads +belong to the offlined CPUs will be terminated immediately. + +Running as SCHED_FIFO and relatively high priority, also allows such +scheme to work for both preemptable and non-preemptable kernels. +Alignment of idle time around jiffies ensures scalability for HZ +values. This effect can be better visualized using a Perf timechart. +The following diagram shows the behavior of kernel thread +kidle_inject/cpu. During idle injection, it runs monitor/mwait idle +for a given "duration", then relinquishes the CPU to other tasks, +until the next time interval. + +The NOHZ schedule tick is disabled during idle time, but interrupts +are not masked. Tests show that the extra wakeups from scheduler tick +have a dramatic impact on the effectiveness of the powerclamp driver +on large scale systems (Westmere system with 80 processors). + +CPU0 + ____________ ____________ +kidle_inject/0 | sleep | mwait | sleep | + _________| |________| |_______ + duration +CPU1 + ____________ ____________ +kidle_inject/1 | sleep | mwait | sleep | + _________| |________| |_______ + ^ + | + | + roundup(jiffies, interval) + +Only one CPU is allowed to collect statistics and update global +control parameters. This CPU is referred to as the controlling CPU in +this document. The controlling CPU is elected at runtime, with a +policy that favors BSP, taking into account the possibility of a CPU +hot-plug. + +In terms of dynamics of the idle control system, package level idle +time is considered largely as a non-causal system where its behavior +cannot be based on the past or current input. Therefore, the +intel_powerclamp driver attempts to enforce the desired idle time +instantly as given input (target idle ratio). After injection, +powerclamp moniors the actual idle for a given time window and adjust +the next injection accordingly to avoid over/under correction. + +When used in a causal control system, such as a temperature control, +it is up to the user of this driver to implement algorithms where +past samples and outputs are included in the feedback. For example, a +PID-based thermal controller can use the powerclamp driver to +maintain a desired target temperature, based on integral and +derivative gains of the past samples. + + + +Calibration +----------- +During scalability testing, it is observed that synchronized actions +among CPUs become challenging as the number of cores grows. This is +also true for the ability of a system to enter package level C-states. + +To make sure the intel_powerclamp driver scales well, online +calibration is implemented. The goals for doing such a calibration +are: + +a) determine the effective range of idle injection ratio +b) determine the amount of compensation needed at each target ratio + +Compensation to each target ratio consists of two parts: + + a) steady state error compensation + This is to offset the error occurring when the system can + enter idle without extra wakeups (such as external interrupts). + + b) dynamic error compensation + When an excessive amount of wakeups occurs during idle, an + additional idle ratio can be added to quiet interrupts, by + slowing down CPU activities. + +A debugfs file is provided for the user to examine compensation +progress and results, such as on a Westmere system. +[jacob@nex01 ~]$ cat +/sys/kernel/debug/intel_powerclamp/powerclamp_calib +controlling cpu: 0 +pct confidence steady dynamic (compensation) +0 0 0 0 +1 1 0 0 +2 1 1 0 +3 3 1 0 +4 3 1 0 +5 3 1 0 +6 3 1 0 +7 3 1 0 +8 3 1 0 +... +30 3 2 0 +31 3 2 0 +32 3 1 0 +33 3 2 0 +34 3 1 0 +35 3 2 0 +36 3 1 0 +37 3 2 0 +38 3 1 0 +39 3 2 0 +40 3 3 0 +41 3 1 0 +42 3 2 0 +43 3 1 0 +44 3 1 0 +45 3 2 0 +46 3 3 0 +47 3 0 0 +48 3 2 0 +49 3 3 0 + +Calibration occurs during runtime. No offline method is available. +Steady state compensation is used only when confidence levels of all +adjacent ratios have reached satisfactory level. A confidence level +is accumulated based on clean data collected at runtime. Data +collected during a period without extra interrupts is considered +clean. + +To compensate for excessive amounts of wakeup during idle, additional +idle time is injected when such a condition is detected. Currently, +we have a simple algorithm to double the injection ratio. A possible +enhancement might be to throttle the offending IRQ, such as delaying +EOI for level triggered interrupts. But it is a challenge to be +non-intrusive to the scheduler or the IRQ core code. + + +CPU Online/Offline +------------------ +Per-CPU kernel threads are started/stopped upon receiving +notifications of CPU hotplug activities. The intel_powerclamp driver +keeps track of clamping kernel threads, even after they are migrated +to other CPUs, after a CPU offline event. + + +===================== +Performance Analysis +===================== +This section describes the general performance data collected on +multiple systems, including Westmere (80P) and Ivy Bridge (4P, 8P). + +Effectiveness and Limitations +----------------------------- +The maximum range that idle injection is allowed is capped at 50 +percent. As mentioned earlier, since interrupts are allowed during +forced idle time, excessive interrupts could result in less +effectiveness. The extreme case would be doing a ping -f to generated +flooded network interrupts without much CPU acknowledgement. In this +case, little can be done from the idle injection threads. In most +normal cases, such as scp a large file, applications can be throttled +by the powerclamp driver, since slowing down the CPU also slows down +network protocol processing, which in turn reduces interrupts. + +When control parameters change at runtime by the controlling CPU, it +may take an additional period for the rest of the CPUs to catch up +with the changes. During this time, idle injection is out of sync, +thus not able to enter package C- states at the expected ratio. But +this effect is minor, in that in most cases change to the target +ratio is updated much less frequently than the idle injection +frequency. + +Scalability +----------- +Tests also show a minor, but measurable, difference between the 4P/8P +Ivy Bridge system and the 80P Westmere server under 50% idle ratio. +More compensation is needed on Westmere for the same amount of +target idle ratio. The compensation also increases as the idle ratio +gets larger. The above reason constitutes the need for the +calibration code. + +On the IVB 8P system, compared to an offline CPU, powerclamp can +achieve up to 40% better performance per watt. (measured by a spin +counter summed over per CPU counting threads spawned for all running +CPUs). + +==================== +Usage and Interfaces +==================== +The powerclamp driver is registered to the generic thermal layer as a +cooling device. Currently, it’s not bound to any thermal zones. + +jacob@chromoly:/sys/class/thermal/cooling_device14$ grep . * +cur_state:0 +max_state:50 +type:intel_powerclamp + +Example usage: +- To inject 25% idle time +$ sudo sh -c "echo 25 > /sys/class/thermal/cooling_device80/cur_state +" + +If the system is not busy and has more than 25% idle time already, +then the powerclamp driver will not start idle injection. Using Top +will not show idle injection kernel threads. + +If the system is busy (spin test below) and has less than 25% natural +idle time, powerclamp kernel threads will do idle injection, which +appear running to the scheduler. But the overall system idle is still +reflected. In this example, 24.1% idle is shown. This helps the +system admin or user determine the cause of slowdown, when a +powerclamp driver is in action. + + +Tasks: 197 total, 1 running, 196 sleeping, 0 stopped, 0 zombie +Cpu(s): 71.2%us, 4.7%sy, 0.0%ni, 24.1%id, 0.0%wa, 0.0%hi, 0.0%si, 0.0%st +Mem: 3943228k total, 1689632k used, 2253596k free, 74960k buffers +Swap: 4087804k total, 0k used, 4087804k free, 945336k cached + + PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND + 3352 jacob 20 0 262m 644 428 S 286 0.0 0:17.16 spin + 3341 root -51 0 0 0 0 D 25 0.0 0:01.62 kidle_inject/0 + 3344 root -51 0 0 0 0 D 25 0.0 0:01.60 kidle_inject/3 + 3342 root -51 0 0 0 0 D 25 0.0 0:01.61 kidle_inject/1 + 3343 root -51 0 0 0 0 D 25 0.0 0:01.60 kidle_inject/2 + 2935 jacob 20 0 696m 125m 35m S 5 3.3 0:31.11 firefox + 1546 root 20 0 158m 20m 6640 S 3 0.5 0:26.97 Xorg + 2100 jacob 20 0 1223m 88m 30m S 3 2.3 0:23.68 compiz + +Tests have shown that by using the powerclamp driver as a cooling +device, a PID based userspace thermal controller can manage to +control CPU temperature effectively, when no other thermal influence +is added. For example, a UltraBook user can compile the kernel under +certain temperature (below most active trip points). diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index 88c02334e35..6859661c9d3 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -55,6 +55,8 @@ temperature) and throttle appropriate devices. .get_trip_type: get the type of certain trip point. .get_trip_temp: get the temperature above which the certain trip point will be fired. + .set_emul_temp: set the emulation temperature which helps in debugging + different threshold temperature points. 1.1.2 void thermal_zone_device_unregister(struct thermal_zone_device *tz) @@ -153,6 +155,7 @@ Thermal zone device sys I/F, created once it's registered: |---trip_point_[0-*]_temp: Trip point temperature |---trip_point_[0-*]_type: Trip point type |---trip_point_[0-*]_hyst: Hysteresis value for this trip point + |---emul_temp: Emulated temperature set node Thermal cooling device sys I/F, created once it's registered: /sys/class/thermal/cooling_device[0-*]: @@ -252,6 +255,16 @@ passive Valid values: 0 (disabled) or greater than 1000 RW, Optional +emul_temp + Interface to set the emulated temperature method in thermal zone + (sensor). After setting this temperature, the thermal zone may pass + this temperature to platform emulation function if registered or + cache it locally. This is useful in debugging different temperature + threshold and its associated cooling action. This is write only node + and writing 0 on this node should disable emulation. + Unit: millidegree Celsius + WO, Optional + ***************************** * Cooling device attributes * ***************************** @@ -329,8 +342,9 @@ The framework includes a simple notification mechanism, in the form of a netlink event. Netlink socket initialization is done during the _init_ of the framework. Drivers which intend to use the notification mechanism just need to call thermal_generate_netlink_event() with two arguments viz -(originator, event). Typically the originator will be an integer assigned -to a thermal_zone_device when it registers itself with the framework. The +(originator, event). The originator is a pointer to struct thermal_zone_device +from where the event has been originated. An integer which represents the +thermal zone device will be used in the message to identify the zone. The event will be one of:{THERMAL_AUX0, THERMAL_AUX1, THERMAL_CRITICAL, THERMAL_DEV_FAULT}. Notification can be sent when the current temperature crosses any of the configured thresholds. diff --git a/MAINTAINERS b/MAINTAINERS index 0b4bb157a48..6db1c6bdf01 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1800,7 +1800,8 @@ F: drivers/bcma/ F: include/linux/bcma/ BROCADE BFA FC SCSI DRIVER -M: Krishna C Gudipati <kgudipat@brocade.com> +M: Anil Gurumurthy <agurumur@brocade.com> +M: Vijaya Mohan Guvva <vmohan@brocade.com> L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/bfa/ @@ -2074,8 +2075,8 @@ S: Maintained F: include/linux/clk.h CISCO FCOE HBA DRIVER -M: Abhijeet Joglekar <abjoglek@cisco.com> -M: Venkata Siva Vijayendra Bhamidipati <vbhamidi@cisco.com> +M: Hiral Patel <hiralpat@cisco.com> +M: Suma Ramars <sramars@cisco.com> M: Brian Uchino <buchino@cisco.com> L: linux-scsi@vger.kernel.org S: Supported @@ -2903,6 +2904,13 @@ W: bluesmoke.sourceforge.net S: Maintained F: drivers/edac/e7xxx_edac.c +EDAC-GHES +M: Mauro Carvalho Chehab <mchehab@redhat.com> +L: linux-edac@vger.kernel.org +W: bluesmoke.sourceforge.net +S: Maintained +F: drivers/edac/ghes-edac.c + EDAC-I82443BXGX M: Tim Small <tim@buttersideup.com> L: linux-edac@vger.kernel.org @@ -6514,6 +6522,12 @@ S: Maintained F: Documentation/blockdev/ramdisk.txt F: drivers/block/brd.c +RAMSAM DRIVER (IBM RamSan 70/80 PCI SSD Flash Card) +M: Joshua Morris <josh.h.morris@us.ibm.com> +M: Philip Kelleher <pjk1939@linux.vnet.ibm.com> +S: Maintained +F: drivers/block/rsxx/ + RANDOM NUMBER DRIVER M: Theodore Ts'o" <tytso@mit.edu> S: Maintained diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0e16cca1d01..5b714695b01 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1676,7 +1676,6 @@ config HZ int default 200 if ARCH_EBSA110 || ARCH_S3C24XX || ARCH_S5P64X0 || \ ARCH_S5PV210 || ARCH_EXYNOS4 - default OMAP_32K_TIMER_HZ if ARCH_OMAP && OMAP_32K_TIMER default AT91_TIMER_HZ if ARCH_AT91 default SHMOBILE_TIMER_HZ if ARCH_SHMOBILE default 100 diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 411ab1614a0..9c6255884cb 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -56,6 +56,7 @@ dtb-$(CONFIG_ARCH_KIRKWOOD) += kirkwood-dns320.dtb \ kirkwood-dockstar.dtb \ kirkwood-dreamplug.dtb \ kirkwood-goflexnet.dtb \ + kirkwood-guruplug-server-plus.dtb \ kirkwood-ib62x0.dtb \ kirkwood-iconnect.dtb \ kirkwood-iomega_ix2_200.dtb \ @@ -78,11 +79,21 @@ dtb-$(CONFIG_ARCH_MSM) += msm8660-surf.dtb \ msm8960-cdp.dtb dtb-$(CONFIG_ARCH_MVEBU) += armada-370-db.dtb \ armada-370-mirabox.dtb \ + armada-370-rd.dtb \ armada-xp-db.dtb \ + armada-xp-gp.dtb \ armada-xp-openblocks-ax3-4.dtb -dtb-$(CONFIG_ARCH_MXC) += imx51-babbage.dtb \ +dtb-$(CONFIG_ARCH_MXC) += \ + imx25-karo-tx25.dtb \ + imx25-pdk.dtb \ + imx27-apf27.dtb \ + imx27-pdk.dtb \ + imx31-bug.dtb \ + imx51-apf51.dtb \ + imx51-babbage.dtb \ imx53-ard.dtb \ imx53-evk.dtb \ + imx53-mba53.dtb \ imx53-qsb.dtb \ imx53-smd.dtb \ imx6q-arm2.dtb \ diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index c2f14e875eb..0957645b73a 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -385,5 +385,19 @@ mac-address = [ 00 00 00 00 00 00 ]; }; }; + + ocmcram: ocmcram@40300000 { + compatible = "ti,am3352-ocmcram"; + reg = <0x40300000 0x10000>; + ti,hwmods = "ocmcram"; + ti,no_idle_on_suspend; + }; + + wkup_m3: wkup_m3@44d00000 { + compatible = "ti,am3353-wkup-m3"; + reg = <0x44d00000 0x4000 /* M3 UMEM */ + 0x44d80000 0x2000>; /* M3 DMEM */ + ti,hwmods = "wkup_m3"; + }; }; }; diff --git a/arch/arm/boot/dts/armada-370-db.dts b/arch/arm/boot/dts/armada-370-db.dts index 9b82facb256..e34b280ce6e 100644 --- a/arch/arm/boot/dts/armada-370-db.dts +++ b/arch/arm/boot/dts/armada-370-db.dts @@ -59,5 +59,40 @@ phy = <&phy1>; phy-mode = "rgmii-id"; }; + + mvsdio@d00d4000 { + pinctrl-0 = <&sdio_pins1>; + pinctrl-names = "default"; + /* + * This device is disabled by default, because + * using the SD card connector requires + * changing the default CON40 connector + * "DB-88F6710_MPP_2xRGMII_DEVICE_Jumper" to a + * different connector + * "DB-88F6710_MPP_RGMII_SD_Jumper". + */ + status = "disabled"; + /* No CD or WP GPIOs */ + }; + + usb@d0050000 { + status = "okay"; + }; + + usb@d0051000 { + status = "okay"; + }; + + spi0: spi@d0010600 { + status = "okay"; + + spi-flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "mx25l25635e"; + reg = <0>; /* Chip select 0 */ + spi-max-frequency = <50000000>; + }; + }; }; }; diff --git a/arch/arm/boot/dts/armada-370-mirabox.dts b/arch/arm/boot/dts/armada-370-mirabox.dts index 3b407133659..dd0c57dd9f3 100644 --- a/arch/arm/boot/dts/armada-370-mirabox.dts +++ b/arch/arm/boot/dts/armada-370-mirabox.dts @@ -52,5 +52,23 @@ phy = <&phy1>; phy-mode = "rgmii-id"; }; + + mvsdio@d00d4000 { + pinctrl-0 = <&sdio_pins2>; + pinctrl-names = "default"; + status = "okay"; + /* + * No CD or WP GPIOs: SDIO interface used for + * Wifi/Bluetooth chip + */ + }; + + usb@d0050000 { + status = "okay"; + }; + + usb@d0051000 { + status = "okay"; + }; }; }; diff --git a/arch/arm/boot/dts/armada-370-rd.dts b/arch/arm/boot/dts/armada-370-rd.dts new file mode 100644 index 00000000000..f8e4855bc9a --- /dev/null +++ b/arch/arm/boot/dts/armada-370-rd.dts @@ -0,0 +1,68 @@ +/* + * Device Tree file for Marvell Armada 370 Reference Design board + * (RD-88F6710-A1) + * + * Copied from arch/arm/boot/dts/armada-370-db.dts + * + * Copyright (C) 2013 Florian Fainelli <florian@openwrt.org> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +/dts-v1/; +/include/ "armada-370.dtsi" + +/ { + model = "Marvell Armada 370 Reference Design"; + compatible = "marvell,a370-rd", "marvell,armada370", "marvell,armada-370-xp"; + + chosen { + bootargs = "console=ttyS0,115200 earlyprintk"; + }; + + memory { + device_type = "memory"; + reg = <0x00000000 0x20000000>; /* 512 MB */ + }; + + soc { + serial@d0012000 { + clock-frequency = <200000000>; + status = "okay"; + }; + sata@d00a0000 { + nr-ports = <2>; + status = "okay"; + }; + + mdio { + phy0: ethernet-phy@0 { + reg = <0>; + }; + + phy1: ethernet-phy@1 { + reg = <1>; + }; + }; + + ethernet@d0070000 { + status = "okay"; + phy = <&phy0>; + phy-mode = "sgmii"; + }; + ethernet@d0074000 { + status = "okay"; + phy = <&phy1>; + phy-mode = "rgmii-id"; + }; + + mvsdio@d00d4000 { + pinctrl-0 = <&sdio_pins1>; + pinctrl-names = "default"; + status = "okay"; + /* No CD or WP GPIOs */ + }; + }; +}; diff --git a/arch/arm/boot/dts/armada-370-xp.dtsi b/arch/arm/boot/dts/armada-370-xp.dtsi index 5b2922599f0..6f1acc75e15 100644 --- a/arch/arm/boot/dts/armada-370-xp.dtsi +++ b/arch/arm/boot/dts/armada-370-xp.dtsi @@ -68,8 +68,9 @@ timer@d0020300 { compatible = "marvell,armada-370-xp-timer"; - reg = <0xd0020300 0x30>; - interrupts = <37>, <38>, <39>, <40>; + reg = <0xd0020300 0x30>, + <0xd0021040 0x30>; + interrupts = <37>, <38>, <39>, <40>, <5>, <6>; clocks = <&coreclk 2>; }; @@ -137,6 +138,50 @@ reg = <0xd0010300 0x20>; interrupts = <50>; }; + + mvsdio@d00d4000 { + compatible = "marvell,orion-sdio"; + reg = <0xd00d4000 0x200>; + interrupts = <54>; + clocks = <&gateclk 17>; + status = "disabled"; + }; + + usb@d0050000 { + compatible = "marvell,orion-ehci"; + reg = <0xd0050000 0x500>; + interrupts = <45>; + status = "disabled"; + }; + + usb@d0051000 { + compatible = "marvell,orion-ehci"; + reg = <0xd0051000 0x500>; + interrupts = <46>; + status = "disabled"; + }; + + spi0: spi@d0010600 { + compatible = "marvell,orion-spi"; + reg = <0xd0010600 0x28>; + #address-cells = <1>; + #size-cells = <0>; + cell-index = <0>; + interrupts = <30>; + clocks = <&coreclk 0>; + status = "disabled"; + }; + + spi1: spi@d0010680 { + compatible = "marvell,orion-spi"; + reg = <0xd0010680 0x28>; + #address-cells = <1>; + #size-cells = <0>; + cell-index = <1>; + interrupts = <92>; + clocks = <&coreclk 0>; + status = "disabled"; + }; }; }; diff --git a/arch/arm/boot/dts/armada-370.dtsi b/arch/arm/boot/dts/armada-370.dtsi index 636cf7d4009..8188d138020 100644 --- a/arch/arm/boot/dts/armada-370.dtsi +++ b/arch/arm/boot/dts/armada-370.dtsi @@ -47,6 +47,18 @@ pinctrl { compatible = "marvell,mv88f6710-pinctrl"; reg = <0xd0018000 0x38>; + + sdio_pins1: sdio-pins1 { + marvell,pins = "mpp9", "mpp11", "mpp12", + "mpp13", "mpp14", "mpp15"; + marvell,function = "sd0"; + }; + + sdio_pins2: sdio-pins2 { + marvell,pins = "mpp47", "mpp48", "mpp49", + "mpp50", "mpp51", "mpp52"; + marvell,function = "sd0"; + }; }; gpio0: gpio@d0018100 { @@ -132,5 +144,14 @@ dmacap,memset; }; }; + + usb@d0050000 { + clocks = <&coreclk 0>; + }; + + usb@d0051000 { + clocks = <&coreclk 0>; + }; + }; }; diff --git a/arch/arm/boot/dts/armada-xp-db.dts b/arch/arm/boot/dts/armada-xp-db.dts index 8e53b25b550..e83505e4c23 100644 --- a/arch/arm/boot/dts/armada-xp-db.dts +++ b/arch/arm/boot/dts/armada-xp-db.dts @@ -90,5 +90,36 @@ phy = <&phy3>; phy-mode = "sgmii"; }; + + mvsdio@d00d4000 { + pinctrl-0 = <&sdio_pins>; + pinctrl-names = "default"; + status = "okay"; + /* No CD or WP GPIOs */ + }; + + usb@d0050000 { + status = "okay"; + }; + + usb@d0051000 { + status = "okay"; + }; + + usb@d0052000 { + status = "okay"; + }; + + spi0: spi@d0010600 { + status = "okay"; + + spi-flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "m25p64"; + reg = <0>; /* Chip select 0 */ + spi-max-frequency = <20000000>; + }; + }; }; }; diff --git a/arch/arm/boot/dts/armada-xp-gp.dts b/arch/arm/boot/dts/armada-xp-gp.dts new file mode 100644 index 00000000000..1c8afe2ffeb --- /dev/null +++ b/arch/arm/boot/dts/armada-xp-gp.dts @@ -0,0 +1,113 @@ +/* + * Device Tree file for Marvell Armada XP development board + * (DB-MV784MP-GP) + * + * Copyright (C) 2013 Marvell + * + * Lior Amsalem <alior@marvell.com> + * Gregory CLEMENT <gregory.clement@free-electrons.com> + * Thomas Petazzoni <thomas.petazzoni@free-electrons.com> + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +/dts-v1/; +/include/ "armada-xp-mv78460.dtsi" + +/ { + model = "Marvell Armada XP Development Board DB-MV784MP-GP"; + compatible = "marvell,axp-gp", "marvell,armadaxp-mv78460", "marvell,armadaxp", "marvell,armada-370-xp"; + + chosen { + bootargs = "console=ttyS0,115200 earlyprintk"; + }; + + memory { + device_type = "memory"; + + /* + * 4 GB of plug-in RAM modules by default but only 3GB + * are visible, the amount of memory available can be + * changed by the bootloader according the size of the + * module actually plugged + */ + reg = <0x00000000 0xC0000000>; + }; + + soc { + serial@d0012000 { + clock-frequency = <250000000>; + status = "okay"; + }; + serial@d0012100 { + clock-frequency = <250000000>; + status = "okay"; + }; + serial@d0012200 { + clock-frequency = <250000000>; + status = "okay"; + }; + serial@d0012300 { + clock-frequency = <250000000>; + status = "okay"; + }; + + sata@d00a0000 { + nr-ports = <2>; + status = "okay"; + }; + + mdio { + phy0: ethernet-phy@0 { + reg = <16>; + }; + + phy1: ethernet-phy@1 { + reg = <17>; + }; + + phy2: ethernet-phy@2 { + reg = <18>; + }; + + phy3: ethernet-phy@3 { + reg = <19>; + }; + }; + + ethernet@d0070000 { + status = "okay"; + phy = <&phy0>; + phy-mode = "rgmii-id"; + }; + ethernet@d0074000 { + status = "okay"; + phy = <&phy1>; + phy-mode = "rgmii-id"; + }; + ethernet@d0030000 { + status = "okay"; + phy = <&phy2>; + phy-mode = "rgmii-id"; + }; + ethernet@d0034000 { + status = "okay"; + phy = <&phy3>; + phy-mode = "rgmii-id"; + }; + + spi0: spi@d0010600 { + status = "okay"; + + spi-flash@0 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "n25q128a13"; + reg = <0>; /* Chip select 0 */ + spi-max-frequency = <108000000>; + }; + }; + }; +}; diff --git a/arch/arm/boot/dts/armada-xp-mv78230.dtsi b/arch/arm/boot/dts/armada-xp-mv78230.dtsi index e041f42ed71..f56c40599f5 100644 --- a/arch/arm/boot/dts/armada-xp-mv78230.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78230.dtsi @@ -47,6 +47,12 @@ pinctrl { compatible = "marvell,mv78230-pinctrl"; reg = <0xd0018000 0x38>; + + sdio_pins: sdio-pins { + marvell,pins = "mpp30", "mpp31", "mpp32", + "mpp33", "mpp34", "mpp35"; + marvell,function = "sd0"; + }; }; gpio0: gpio@d0018100 { diff --git a/arch/arm/boot/dts/armada-xp-mv78260.dtsi b/arch/arm/boot/dts/armada-xp-mv78260.dtsi index 9e23bd8c953..f8f2b787d2b 100644 --- a/arch/arm/boot/dts/armada-xp-mv78260.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78260.dtsi @@ -48,6 +48,12 @@ pinctrl { compatible = "marvell,mv78260-pinctrl"; reg = <0xd0018000 0x38>; + + sdio_pins: sdio-pins { + marvell,pins = "mpp30", "mpp31", "mpp32", + "mpp33", "mpp34", "mpp35"; + marvell,function = "sd0"; + }; }; gpio0: gpio@d0018100 { diff --git a/arch/arm/boot/dts/armada-xp-mv78460.dtsi b/arch/arm/boot/dts/armada-xp-mv78460.dtsi index 965966110e3..936c25dc32b 100644 --- a/arch/arm/boot/dts/armada-xp-mv78460.dtsi +++ b/arch/arm/boot/dts/armada-xp-mv78460.dtsi @@ -63,6 +63,12 @@ pinctrl { compatible = "marvell,mv78460-pinctrl"; reg = <0xd0018000 0x38>; + + sdio_pins: sdio-pins { + marvell,pins = "mpp30", "mpp31", "mpp32", + "mpp33", "mpp34", "mpp35"; + marvell,function = "sd0"; + }; }; gpio0: gpio@d0018100 { diff --git a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts index b42652fd3d8..3818a82176a 100644 --- a/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts +++ b/arch/arm/boot/dts/armada-xp-openblocks-ax3-4.dts @@ -66,6 +66,18 @@ }; }; + gpio_keys { + compatible = "gpio-keys"; + #address-cells = <1>; + #size-cells = <0>; + + button@1 { + label = "Init Button"; + linux,code = <116>; + gpios = <&gpio1 28 0>; + }; + }; + mdio { phy0: ethernet-phy@0 { reg = <0>; @@ -121,5 +133,11 @@ nr-ports = <2>; status = "okay"; }; + usb@d0050000 { + status = "okay"; + }; + usb@d0051000 { + status = "okay"; + }; }; }; diff --git a/arch/arm/boot/dts/armada-xp.dtsi b/arch/arm/boot/dts/armada-xp.dtsi index 2e37ef101c9..1443949c165 100644 --- a/arch/arm/boot/dts/armada-xp.dtsi +++ b/arch/arm/boot/dts/armada-xp.dtsi @@ -30,7 +30,7 @@ }; mpic: interrupt-controller@d0020000 { - reg = <0xd0020a00 0x1d0>, + reg = <0xd0020a00 0x2d0>, <0xd0021070 0x58>; }; @@ -134,5 +134,22 @@ dmacap,memset; }; }; + + usb@d0050000 { + clocks = <&gateclk 18>; + }; + + usb@d0051000 { + clocks = <&gateclk 19>; + }; + + usb@d0052000 { + compatible = "marvell,orion-ehci"; + reg = <0xd0052000 0x500>; + interrupts = <47>; + clocks = <&gateclk 20>; + status = "disabled"; + }; + }; }; diff --git a/arch/arm/boot/dts/dove-cubox.dts b/arch/arm/boot/dts/dove-cubox.dts index cdee96fca6e..7e3065abd75 100644 --- a/arch/arm/boot/dts/dove-cubox.dts +++ b/arch/arm/boot/dts/dove-cubox.dts @@ -17,12 +17,33 @@ leds { compatible = "gpio-leds"; + pinctrl-0 = <&pmx_gpio_18>; + pinctrl-names = "default"; + power { label = "Power"; gpios = <&gpio0 18 1>; linux,default-trigger = "default-on"; }; }; + + regulators { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <0>; + + usb_power: regulator@1 { + compatible = "regulator-fixed"; + reg = <1>; + regulator-name = "USB Power"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + enable-active-high; + regulator-always-on; + regulator-boot-on; + gpio = <&gpio0 1 0>; + }; + }; }; &uart0 { status = "okay"; }; @@ -47,9 +68,14 @@ }; &pinctrl { - pinctrl-0 = <&pmx_gpio_12 &pmx_gpio_18>; + pinctrl-0 = <&pmx_gpio_1 &pmx_gpio_12>; pinctrl-names = "default"; + pmx_gpio_1: pmx-gpio-1 { + marvell,pins = "mpp1"; + marvell,function = "gpio"; + }; + pmx_gpio_12: pmx-gpio-12 { marvell,pins = "mpp12"; marvell,function = "gpio"; diff --git a/arch/arm/boot/dts/dove.dtsi b/arch/arm/boot/dts/dove.dtsi index 740630f9cd6..67dbe20868a 100644 --- a/arch/arm/boot/dts/dove.dtsi +++ b/arch/arm/boot/dts/dove.dtsi @@ -55,7 +55,7 @@ reg = <0x12000 0x100>; reg-shift = <2>; interrupts = <7>; - clock-frequency = <166666667>; + clocks = <&core_clk 0>; status = "disabled"; }; @@ -64,7 +64,7 @@ reg = <0x12100 0x100>; reg-shift = <2>; interrupts = <8>; - clock-frequency = <166666667>; + clocks = <&core_clk 0>; status = "disabled"; }; @@ -73,7 +73,7 @@ reg = <0x12000 0x100>; reg-shift = <2>; interrupts = <9>; - clock-frequency = <166666667>; + clocks = <&core_clk 0>; status = "disabled"; }; @@ -82,7 +82,7 @@ reg = <0x12100 0x100>; reg-shift = <2>; interrupts = <10>; - clock-frequency = <166666667>; + clocks = <&core_clk 0>; status = "disabled"; }; @@ -156,6 +156,22 @@ status = "disabled"; }; + ehci0: usb-host@50000 { + compatible = "marvell,orion-ehci"; + reg = <0x50000 0x1000>; + interrupts = <24>; + clocks = <&gate_clk 0>; + status = "okay"; + }; + + ehci1: usb-host@51000 { + compatible = "marvell,orion-ehci"; + reg = <0x51000 0x1000>; + interrupts = <25>; + clocks = <&gate_clk 1>; + status = "okay"; + }; + sdio0: sdio@92000 { compatible = "marvell,dove-sdhci"; reg = <0x92000 0x100>; diff --git a/arch/arm/boot/dts/imx25-karo-tx25.dts b/arch/arm/boot/dts/imx25-karo-tx25.dts index d81f8a0b979..1a9d0491cdc 100644 --- a/arch/arm/boot/dts/imx25-karo-tx25.dts +++ b/arch/arm/boot/dts/imx25-karo-tx25.dts @@ -19,26 +19,18 @@ memory { reg = <0x80000000 0x02000000 0x90000000 0x02000000>; }; +}; - soc { - aips@43f00000 { - uart1: serial@43f90000 { - status = "okay"; - }; - }; +&uart1 { + status = "okay"; +}; - spba@50000000 { - fec: ethernet@50038000 { - status = "okay"; - phy-mode = "rmii"; - }; - }; +&fec { + phy-mode = "rmii"; + status = "okay"; +}; - emi@80000000 { - nand@bb000000 { - nand-on-flash-bbt; - status = "okay"; - }; - }; - }; +&nfc { + nand-on-flash-bbt; + status = "okay"; }; diff --git a/arch/arm/boot/dts/imx25-pdk.dts b/arch/arm/boot/dts/imx25-pdk.dts new file mode 100644 index 00000000000..a02a860afd1 --- /dev/null +++ b/arch/arm/boot/dts/imx25-pdk.dts @@ -0,0 +1,36 @@ +/* + * Copyright 2013 Freescale Semiconductor, Inc. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +/dts-v1/; +/include/ "imx25.dtsi" + +/ { + model = "Freescale i.MX25 Product Development Kit"; + compatible = "fsl,imx25-pdk", "fsl,imx25"; + + memory { + reg = <0x80000000 0x4000000>; + }; +}; + +&uart1 { + status = "okay"; +}; + +&fec { + phy-mode = "rmii"; + status = "okay"; +}; + +&nfc { + nand-on-flash-bbt; + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx25.dtsi b/arch/arm/boot/dts/imx25.dtsi index e1b13ebc96d..94f33059158 100644 --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@ -499,7 +499,7 @@ reg = <0x80000000 0x3b002000>; ranges; - nand@bb000000 { + nfc: nand@bb000000 { #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/imx27-apf27.dts b/arch/arm/boot/dts/imx27-apf27.dts index c0327c054de..b464c807d8d 100644 --- a/arch/arm/boot/dts/imx27-apf27.dts +++ b/arch/arm/boot/dts/imx27-apf27.dts @@ -32,58 +32,54 @@ clock-frequency = <0>; }; }; +}; - soc { - aipi@10000000 { - serial@1000a000 { - status = "okay"; - }; +&uart1 { + status = "okay"; +}; - ethernet@1002b000 { - status = "okay"; - }; - }; +&fec { + status = "okay"; +}; - nand@d8000000 { - status = "okay"; - nand-bus-width = <16>; - nand-ecc-mode = "hw"; - nand-on-flash-bbt; +&nfc { + status = "okay"; + nand-bus-width = <16>; + nand-ecc-mode = "hw"; + nand-on-flash-bbt; - partition@0 { - label = "u-boot"; - reg = <0x0 0x100000>; - }; + partition@0 { + label = "u-boot"; + reg = <0x0 0x100000>; + }; - partition@100000 { - label = "env"; - reg = <0x100000 0x80000>; - }; + partition@100000 { + label = "env"; + reg = <0x100000 0x80000>; + }; - partition@180000 { - label = "env2"; - reg = <0x180000 0x80000>; - }; + partition@180000 { + label = "env2"; + reg = <0x180000 0x80000>; + }; - partition@200000 { - label = "firmware"; - reg = <0x200000 0x80000>; - }; + partition@200000 { + label = "firmware"; + reg = <0x200000 0x80000>; + }; - partition@280000 { - label = "dtb"; - reg = <0x280000 0x80000>; - }; + partition@280000 { + label = "dtb"; + reg = <0x280000 0x80000>; + }; - partition@300000 { - label = "kernel"; - reg = <0x300000 0x500000>; - }; + partition@300000 { + label = "kernel"; + reg = <0x300000 0x500000>; + }; - partition@800000 { - label = "rootfs"; - reg = <0x800000 0xf800000>; - }; - }; + partition@800000 { + label = "rootfs"; + reg = <0x800000 0xf800000>; }; }; diff --git a/arch/arm/boot/dts/imx27-3ds.dts b/arch/arm/boot/dts/imx27-pdk.dts index fa04c7b18bc..41cd1105608 100644 --- a/arch/arm/boot/dts/imx27-3ds.dts +++ b/arch/arm/boot/dts/imx27-pdk.dts @@ -13,25 +13,19 @@ /include/ "imx27.dtsi" / { - model = "mx27_3ds"; - compatible = "freescale,imx27-3ds", "fsl,imx27"; + model = "Freescale i.MX27 Product Development Kit"; + compatible = "fsl,imx27-pdk", "fsl,imx27"; memory { reg = <0x0 0x0>; }; +}; - soc { - aipi@10000000 { /* aipi1 */ - uart1: serial@1000a000 { - fsl,uart-has-rtscts; - status = "okay"; - }; - }; +&uart1 { + fsl,uart-has-rtscts; + status = "okay"; +}; - aipi@10020000 { /* aipi2 */ - ethernet@1002b000 { - status = "okay"; - }; - }; - }; +&fec { + status = "okay"; }; diff --git a/arch/arm/boot/dts/imx31-bug.dts b/arch/arm/boot/dts/imx31-bug.dts index 7f67402328d..9ac6f6ba1d6 100644 --- a/arch/arm/boot/dts/imx31-bug.dts +++ b/arch/arm/boot/dts/imx31-bug.dts @@ -19,13 +19,9 @@ memory { reg = <0x80000000 0x8000000>; /* 128M */ }; +}; - soc { - aips@43f00000 { /* AIPS1 */ - uart5: serial@43fb4000 { - fsl,uart-has-rtscts; - status = "okay"; - }; - }; - }; +&uart5 { + fsl,uart-has-rtscts; + status = "okay"; }; diff --git a/arch/arm/boot/dts/imx51-apf51.dts b/arch/arm/boot/dts/imx51-apf51.dts new file mode 100644 index 00000000000..92d3a66a69e --- /dev/null +++ b/arch/arm/boot/dts/imx51-apf51.dts @@ -0,0 +1,52 @@ +/* + * Copyright 2012 Armadeus Systems - <support@armadeus.com> + * Copyright 2012 Laurent Cans <laurent.cans@gmail.com> + * + * Based on mx51-babbage.dts + * Copyright 2011 Freescale Semiconductor, Inc. + * Copyright 2011 Linaro Ltd. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +/dts-v1/; +/include/ "imx51.dtsi" + +/ { + model = "Armadeus Systems APF51 module"; + compatible = "armadeus,imx51-apf51", "fsl,imx51"; + + memory { + reg = <0x90000000 0x20000000>; + }; + + clocks { + ckih1 { + clock-frequency = <0>; + }; + + osc { + clock-frequency = <33554432>; + }; + }; +}; + +&fec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_fec_2>; + phy-mode = "mii"; + phy-reset-gpios = <&gpio3 0 0>; + phy-reset-duration = <1>; + status = "okay"; +}; + +&uart3 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart3_2>; + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx51-babbage.dts b/arch/arm/boot/dts/imx51-babbage.dts index 567e7ee72f9..aab6e43219a 100644 --- a/arch/arm/boot/dts/imx51-babbage.dts +++ b/arch/arm/boot/dts/imx51-babbage.dts @@ -21,239 +21,20 @@ reg = <0x90000000 0x20000000>; }; - soc { - display@di0 { - compatible = "fsl,imx-parallel-display"; - crtcs = <&ipu 0>; - interface-pix-fmt = "rgb24"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_ipu_disp1_1>; - }; - - display@di1 { - compatible = "fsl,imx-parallel-display"; - crtcs = <&ipu 1>; - interface-pix-fmt = "rgb565"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_ipu_disp2_1>; - }; - - aips@70000000 { /* aips-1 */ - spba@70000000 { - esdhc@70004000 { /* ESDHC1 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_esdhc1_1>; - fsl,cd-controller; - fsl,wp-controller; - status = "okay"; - }; - - esdhc@70008000 { /* ESDHC2 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_esdhc2_1>; - cd-gpios = <&gpio1 6 0>; - wp-gpios = <&gpio1 5 0>; - status = "okay"; - }; - - uart3: serial@7000c000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart3_1>; - fsl,uart-has-rtscts; - status = "okay"; - }; - - ecspi@70010000 { /* ECSPI1 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_ecspi1_1>; - fsl,spi-num-chipselects = <2>; - cs-gpios = <&gpio4 24 0>, <&gpio4 25 0>; - status = "okay"; - - pmic: mc13892@0 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,mc13892"; - spi-max-frequency = <6000000>; - reg = <0>; - interrupt-parent = <&gpio1>; - interrupts = <8 0x4>; - - regulators { - sw1_reg: sw1 { - regulator-min-microvolt = <600000>; - regulator-max-microvolt = <1375000>; - regulator-boot-on; - regulator-always-on; - }; - - sw2_reg: sw2 { - regulator-min-microvolt = <900000>; - regulator-max-microvolt = <1850000>; - regulator-boot-on; - regulator-always-on; - }; - - sw3_reg: sw3 { - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1850000>; - regulator-boot-on; - regulator-always-on; - }; - - sw4_reg: sw4 { - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1850000>; - regulator-boot-on; - regulator-always-on; - }; - - vpll_reg: vpll { - regulator-min-microvolt = <1050000>; - regulator-max-microvolt = <1800000>; - regulator-boot-on; - regulator-always-on; - }; - - vdig_reg: vdig { - regulator-min-microvolt = <1650000>; - regulator-max-microvolt = <1650000>; - regulator-boot-on; - }; - - vsd_reg: vsd { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <3150000>; - }; - - vusb2_reg: vusb2 { - regulator-min-microvolt = <2400000>; - regulator-max-microvolt = <2775000>; - regulator-boot-on; - regulator-always-on; - }; - - vvideo_reg: vvideo { - regulator-min-microvolt = <2775000>; - regulator-max-microvolt = <2775000>; - }; - - vaudio_reg: vaudio { - regulator-min-microvolt = <2300000>; - regulator-max-microvolt = <3000000>; - }; - - vcam_reg: vcam { - regulator-min-microvolt = <2500000>; - regulator-max-microvolt = <3000000>; - }; - - vgen1_reg: vgen1 { - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <1200000>; - }; - - vgen2_reg: vgen2 { - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <3150000>; - regulator-always-on; - }; - - vgen3_reg: vgen3 { - regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <2900000>; - regulator-always-on; - }; - }; - }; - - flash: at45db321d@1 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "atmel,at45db321d", "atmel,at45", "atmel,dataflash"; - spi-max-frequency = <25000000>; - reg = <1>; - - partition@0 { - label = "U-Boot"; - reg = <0x0 0x40000>; - read-only; - }; - - partition@40000 { - label = "Kernel"; - reg = <0x40000 0x3c0000>; - }; - }; - }; - - ssi2: ssi@70014000 { - fsl,mode = "i2s-slave"; - status = "okay"; - }; - }; - - iomuxc@73fa8000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_hog>; - - hog { - pinctrl_hog: hoggrp { - fsl,pins = < - 694 0x20d5 /* MX51_PAD_GPIO1_0__SD1_CD */ - 697 0x20d5 /* MX51_PAD_GPIO1_1__SD1_WP */ - 737 0x100 /* MX51_PAD_GPIO1_5__GPIO1_5 */ - 740 0x100 /* MX51_PAD_GPIO1_6__GPIO1_6 */ - 121 0x5 /* MX51_PAD_EIM_A27__GPIO2_21 */ - 402 0x85 /* MX51_PAD_CSPI1_SS0__GPIO4_24 */ - 405 0x85 /* MX51_PAD_CSPI1_SS1__GPIO4_25 */ - >; - }; - }; - }; - - uart1: serial@73fbc000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart1_1>; - fsl,uart-has-rtscts; - status = "okay"; - }; - - uart2: serial@73fc0000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart2_1>; - status = "okay"; - }; - }; - - aips@80000000 { /* aips-2 */ - i2c@83fc4000 { /* I2C2 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_i2c2_1>; - status = "okay"; - - sgtl5000: codec@0a { - compatible = "fsl,sgtl5000"; - reg = <0x0a>; - clock-frequency = <26000000>; - VDDA-supply = <&vdig_reg>; - VDDIO-supply = <&vvideo_reg>; - }; - }; - - audmux@83fd0000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_audmux_1>; - status = "okay"; - }; + display@di0 { + compatible = "fsl,imx-parallel-display"; + crtcs = <&ipu 0>; + interface-pix-fmt = "rgb24"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ipu_disp1_1>; + }; - ethernet@83fec000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_fec_1>; - phy-mode = "mii"; - status = "okay"; - }; - }; + display@di1 { + compatible = "fsl,imx-parallel-display"; + crtcs = <&ipu 1>; + interface-pix-fmt = "rgb565"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ipu_disp2_1>; }; gpio-keys { @@ -281,3 +62,236 @@ mux-ext-port = <3>; }; }; + +&esdhc1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc1_1>; + fsl,cd-controller; + fsl,wp-controller; + status = "okay"; +}; + +&esdhc2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc2_1>; + cd-gpios = <&gpio1 6 0>; + wp-gpios = <&gpio1 5 0>; + status = "okay"; +}; + +&uart3 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart3_1>; + fsl,uart-has-rtscts; + status = "okay"; +}; + +&ecspi1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ecspi1_1>; + fsl,spi-num-chipselects = <2>; + cs-gpios = <&gpio4 24 0>, <&gpio4 25 0>; + status = "okay"; + + pmic: mc13892@0 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,mc13892"; + spi-max-frequency = <6000000>; + reg = <0>; + interrupt-parent = <&gpio1>; + interrupts = <8 0x4>; + + regulators { + sw1_reg: sw1 { + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <1375000>; + regulator-boot-on; + regulator-always-on; + }; + + sw2_reg: sw2 { + regulator-min-microvolt = <900000>; + regulator-max-microvolt = <1850000>; + regulator-boot-on; + regulator-always-on; + }; + + sw3_reg: sw3 { + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1850000>; + regulator-boot-on; + regulator-always-on; + }; + + sw4_reg: sw4 { + regulator-min-microvolt = <1100000>; + regulator-max-microvolt = <1850000>; + regulator-boot-on; + regulator-always-on; + }; + + vpll_reg: vpll { + regulator-min-microvolt = <1050000>; + regulator-max-microvolt = <1800000>; + regulator-boot-on; + regulator-always-on; + }; + + vdig_reg: vdig { + regulator-min-microvolt = <1650000>; + regulator-max-microvolt = <1650000>; + regulator-boot-on; + }; + + vsd_reg: vsd { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <3150000>; + }; + + vusb2_reg: vusb2 { + regulator-min-microvolt = <2400000>; + regulator-max-microvolt = <2775000>; + regulator-boot-on; + regulator-always-on; + }; + + vvideo_reg: vvideo { + regulator-min-microvolt = <2775000>; + regulator-max-microvolt = <2775000>; + }; + + vaudio_reg: vaudio { + regulator-min-microvolt = <2300000>; + regulator-max-microvolt = <3000000>; + }; + + vcam_reg: vcam { + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <3000000>; + }; + + vgen1_reg: vgen1 { + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; + }; + + vgen2_reg: vgen2 { + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3150000>; + regulator-always-on; + }; + + vgen3_reg: vgen3 { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <2900000>; + regulator-always-on; + }; + }; + }; + + flash: at45db321d@1 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "atmel,at45db321d", "atmel,at45", "atmel,dataflash"; + spi-max-frequency = <25000000>; + reg = <1>; + + partition@0 { + label = "U-Boot"; + reg = <0x0 0x40000>; + read-only; + }; + + partition@40000 { + label = "Kernel"; + reg = <0x40000 0x3c0000>; + }; + }; +}; + +&ssi2 { + fsl,mode = "i2s-slave"; + status = "okay"; +}; + +&iomuxc { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hog>; + + hog { + pinctrl_hog: hoggrp { + fsl,pins = < + 694 0x20d5 /* MX51_PAD_GPIO1_0__SD1_CD */ + 697 0x20d5 /* MX51_PAD_GPIO1_1__SD1_WP */ + 737 0x100 /* MX51_PAD_GPIO1_5__GPIO1_5 */ + 740 0x100 /* MX51_PAD_GPIO1_6__GPIO1_6 */ + 121 0x5 /* MX51_PAD_EIM_A27__GPIO2_21 */ + 402 0x85 /* MX51_PAD_CSPI1_SS0__GPIO4_24 */ + 405 0x85 /* MX51_PAD_CSPI1_SS1__GPIO4_25 */ + >; + }; + }; +}; + +&uart1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart1_1>; + fsl,uart-has-rtscts; + status = "okay"; +}; + +&uart2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart2_1>; + status = "okay"; +}; + +&i2c2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c2_1>; + status = "okay"; + + sgtl5000: codec@0a { + compatible = "fsl,sgtl5000"; + reg = <0x0a>; + clock-frequency = <26000000>; + VDDA-supply = <&vdig_reg>; + VDDIO-supply = <&vvideo_reg>; + }; +}; + +&audmux { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_audmux_1>; + status = "okay"; +}; + +&fec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_fec_1>; + phy-mode = "mii"; + status = "okay"; +}; + +&kpp { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_kpp_1>; + linux,keymap = <0x00000067 /* KEY_UP */ + 0x0001006c /* KEY_DOWN */ + 0x00020072 /* KEY_VOLUMEDOWN */ + 0x00030066 /* KEY_HOME */ + 0x0100006a /* KEY_RIGHT */ + 0x01010069 /* KEY_LEFT */ + 0x0102001c /* KEY_ENTER */ + 0x01030073 /* KEY_VOLUMEUP */ + 0x02000040 /* KEY_F6 */ + 0x02010042 /* KEY_F8 */ + 0x02020043 /* KEY_F9 */ + 0x02030044 /* KEY_F10 */ + 0x0300003b /* KEY_F1 */ + 0x0301003c /* KEY_F2 */ + 0x0302003d /* KEY_F3 */ + 0x03030074>; /* KEY_POWER */ + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx51.dtsi b/arch/arm/boot/dts/imx51.dtsi index 1f5d45eff45..fcf035bf7c5 100644 --- a/arch/arm/boot/dts/imx51.dtsi +++ b/arch/arm/boot/dts/imx51.dtsi @@ -221,6 +221,14 @@ #interrupt-cells = <2>; }; + kpp: kpp@73f94000 { + compatible = "fsl,imx51-kpp", "fsl,imx21-kpp"; + reg = <0x73f94000 0x4000>; + interrupts = <60>; + clocks = <&clks 0>; + status = "disabled"; + }; + wdog1: wdog@73f98000 { compatible = "fsl,imx51-wdt", "fsl,imx21-wdt"; reg = <0x73f98000 0x4000>; @@ -273,6 +281,29 @@ 260 0x80000000 /* MX51_PAD_NANDF_RDY_INT__FEC_TX_CLK */ >; }; + + pinctrl_fec_2: fecgrp-2 { + fsl,pins = < + 589 0x80000000 /* MX51_PAD_DI_GP3__FEC_TX_ER */ + 592 0x80000000 /* MX51_PAD_DI2_PIN4__FEC_CRS */ + 594 0x80000000 /* MX51_PAD_DI2_PIN2__FEC_MDC */ + 596 0x80000000 /* MX51_PAD_DI2_PIN3__FEC_MDIO */ + 598 0x80000000 /* MX51_PAD_DI2_DISP_CLK__FEC_RDATA1 */ + 602 0x80000000 /* MX51_PAD_DI_GP4__FEC_RDATA2 */ + 604 0x80000000 /* MX51_PAD_DISP2_DAT0__FEC_RDATA3 */ + 609 0x80000000 /* MX51_PAD_DISP2_DAT1__FEC_RX_ER */ + 618 0x80000000 /* MX51_PAD_DISP2_DAT6__FEC_TDATA1 */ + 623 0x80000000 /* MX51_PAD_DISP2_DAT7__FEC_TDATA2 */ + 628 0x80000000 /* MX51_PAD_DISP2_DAT8__FEC_TDATA3 */ + 634 0x80000000 /* MX51_PAD_DISP2_DAT9__FEC_TX_EN */ + 639 0x80000000 /* MX51_PAD_DISP2_DAT10__FEC_COL */ + 644 0x80000000 /* MX51_PAD_DISP2_DAT11__FEC_RX_CLK */ + 649 0x80000000 /* MX51_PAD_DISP2_DAT12__FEC_RX_DV */ + 653 0x80000000 /* MX51_PAD_DISP2_DAT13__FEC_TX_CLK */ + 657 0x80000000 /* MX51_PAD_DISP2_DAT14__FEC_RDATA0 */ + 662 0x80000000 /* MX51_PAD_DISP2_DAT15__FEC_TDATA0 */ + >; + }; }; ecspi1 { @@ -409,6 +440,28 @@ 49 0x1c5 /* MX51_PAD_EIM_D24__UART3_CTS */ >; }; + + pinctrl_uart3_2: uart3grp-2 { + fsl,pins = < + 434 0x1c5 /* MX51_PAD_UART3_RXD__UART3_RXD */ + 430 0x1c5 /* MX51_PAD_UART3_TXD__UART3_TXD */ + >; + }; + }; + + kpp { + pinctrl_kpp_1: kppgrp-1 { + fsl,pins = < + 438 0xe0 /* MX51_PAD_KEY_ROW0__KEY_ROW0 */ + 439 0xe0 /* MX51_PAD_KEY_ROW1__KEY_ROW1 */ + 440 0xe0 /* MX51_PAD_KEY_ROW2__KEY_ROW2 */ + 441 0xe0 /* MX51_PAD_KEY_ROW3__KEY_ROW3 */ + 442 0xe8 /* MX51_PAD_KEY_COL0__KEY_COL0 */ + 444 0xe8 /* MX51_PAD_KEY_COL1__KEY_COL1 */ + 446 0xe8 /* MX51_PAD_KEY_COL2__KEY_COL2 */ + 448 0xe8 /* MX51_PAD_KEY_COL3__KEY_COL3 */ + >; + }; }; }; diff --git a/arch/arm/boot/dts/imx53-ard.dts b/arch/arm/boot/dts/imx53-ard.dts index 4be76f22352..e049fd0319e 100644 --- a/arch/arm/boot/dts/imx53-ard.dts +++ b/arch/arm/boot/dts/imx53-ard.dts @@ -21,72 +21,6 @@ reg = <0x70000000 0x40000000>; }; - soc { - aips@50000000 { /* AIPS1 */ - spba@50000000 { - esdhc@50004000 { /* ESDHC1 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_esdhc1_2>; - cd-gpios = <&gpio1 1 0>; - wp-gpios = <&gpio1 9 0>; - status = "okay"; - }; - }; - - iomuxc@53fa8000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_hog>; - - hog { - pinctrl_hog: hoggrp { - fsl,pins = < - 1077 0x80000000 /* MX53_PAD_GPIO_1__GPIO1_1 */ - 1085 0x80000000 /* MX53_PAD_GPIO_9__GPIO1_9 */ - 486 0x80000000 /* MX53_PAD_EIM_EB3__GPIO2_31 */ - 739 0x80000000 /* MX53_PAD_GPIO_10__GPIO4_0 */ - 218 0x80000000 /* MX53_PAD_DISP0_DAT16__GPIO5_10 */ - 226 0x80000000 /* MX53_PAD_DISP0_DAT17__GPIO5_11 */ - 233 0x80000000 /* MX53_PAD_DISP0_DAT18__GPIO5_12 */ - 241 0x80000000 /* MX53_PAD_DISP0_DAT19__GPIO5_13 */ - 429 0x80000000 /* MX53_PAD_EIM_D16__EMI_WEIM_D_16 */ - 435 0x80000000 /* MX53_PAD_EIM_D17__EMI_WEIM_D_17 */ - 441 0x80000000 /* MX53_PAD_EIM_D18__EMI_WEIM_D_18 */ - 448 0x80000000 /* MX53_PAD_EIM_D19__EMI_WEIM_D_19 */ - 456 0x80000000 /* MX53_PAD_EIM_D20__EMI_WEIM_D_20 */ - 464 0x80000000 /* MX53_PAD_EIM_D21__EMI_WEIM_D_21 */ - 471 0x80000000 /* MX53_PAD_EIM_D22__EMI_WEIM_D_22 */ - 477 0x80000000 /* MX53_PAD_EIM_D23__EMI_WEIM_D_23 */ - 492 0x80000000 /* MX53_PAD_EIM_D24__EMI_WEIM_D_24 */ - 500 0x80000000 /* MX53_PAD_EIM_D25__EMI_WEIM_D_25 */ - 508 0x80000000 /* MX53_PAD_EIM_D26__EMI_WEIM_D_26 */ - 516 0x80000000 /* MX53_PAD_EIM_D27__EMI_WEIM_D_27 */ - 524 0x80000000 /* MX53_PAD_EIM_D28__EMI_WEIM_D_28 */ - 532 0x80000000 /* MX53_PAD_EIM_D29__EMI_WEIM_D_29 */ - 540 0x80000000 /* MX53_PAD_EIM_D30__EMI_WEIM_D_30 */ - 548 0x80000000 /* MX53_PAD_EIM_D31__EMI_WEIM_D_31 */ - 637 0x80000000 /* MX53_PAD_EIM_DA0__EMI_NAND_WEIM_DA_0 */ - 642 0x80000000 /* MX53_PAD_EIM_DA1__EMI_NAND_WEIM_DA_1 */ - 647 0x80000000 /* MX53_PAD_EIM_DA2__EMI_NAND_WEIM_DA_2 */ - 652 0x80000000 /* MX53_PAD_EIM_DA3__EMI_NAND_WEIM_DA_3 */ - 657 0x80000000 /* MX53_PAD_EIM_DA4__EMI_NAND_WEIM_DA_4 */ - 662 0x80000000 /* MX53_PAD_EIM_DA5__EMI_NAND_WEIM_DA_5 */ - 667 0x80000000 /* MX53_PAD_EIM_DA6__EMI_NAND_WEIM_DA_6 */ - 611 0x80000000 /* MX53_PAD_EIM_OE__EMI_WEIM_OE */ - 616 0x80000000 /* MX53_PAD_EIM_RW__EMI_WEIM_RW */ - 607 0x80000000 /* MX53_PAD_EIM_CS1__EMI_WEIM_CS_1 */ - >; - }; - }; - }; - - uart1: serial@53fbc000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart1_2>; - status = "okay"; - }; - }; - }; - eim-cs1@f4000000 { #address-cells = <1>; #size-cells = <1>; @@ -162,3 +96,63 @@ }; }; }; + +&esdhc1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc1_2>; + cd-gpios = <&gpio1 1 0>; + wp-gpios = <&gpio1 9 0>; + status = "okay"; +}; + +&iomuxc { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hog>; + + hog { + pinctrl_hog: hoggrp { + fsl,pins = < + 1077 0x80000000 /* MX53_PAD_GPIO_1__GPIO1_1 */ + 1085 0x80000000 /* MX53_PAD_GPIO_9__GPIO1_9 */ + 486 0x80000000 /* MX53_PAD_EIM_EB3__GPIO2_31 */ + 739 0x80000000 /* MX53_PAD_GPIO_10__GPIO4_0 */ + 218 0x80000000 /* MX53_PAD_DISP0_DAT16__GPIO5_10 */ + 226 0x80000000 /* MX53_PAD_DISP0_DAT17__GPIO5_11 */ + 233 0x80000000 /* MX53_PAD_DISP0_DAT18__GPIO5_12 */ + 241 0x80000000 /* MX53_PAD_DISP0_DAT19__GPIO5_13 */ + 429 0x80000000 /* MX53_PAD_EIM_D16__EMI_WEIM_D_16 */ + 435 0x80000000 /* MX53_PAD_EIM_D17__EMI_WEIM_D_17 */ + 441 0x80000000 /* MX53_PAD_EIM_D18__EMI_WEIM_D_18 */ + 448 0x80000000 /* MX53_PAD_EIM_D19__EMI_WEIM_D_19 */ + 456 0x80000000 /* MX53_PAD_EIM_D20__EMI_WEIM_D_20 */ + 464 0x80000000 /* MX53_PAD_EIM_D21__EMI_WEIM_D_21 */ + 471 0x80000000 /* MX53_PAD_EIM_D22__EMI_WEIM_D_22 */ + 477 0x80000000 /* MX53_PAD_EIM_D23__EMI_WEIM_D_23 */ + 492 0x80000000 /* MX53_PAD_EIM_D24__EMI_WEIM_D_24 */ + 500 0x80000000 /* MX53_PAD_EIM_D25__EMI_WEIM_D_25 */ + 508 0x80000000 /* MX53_PAD_EIM_D26__EMI_WEIM_D_26 */ + 516 0x80000000 /* MX53_PAD_EIM_D27__EMI_WEIM_D_27 */ + 524 0x80000000 /* MX53_PAD_EIM_D28__EMI_WEIM_D_28 */ + 532 0x80000000 /* MX53_PAD_EIM_D29__EMI_WEIM_D_29 */ + 540 0x80000000 /* MX53_PAD_EIM_D30__EMI_WEIM_D_30 */ + 548 0x80000000 /* MX53_PAD_EIM_D31__EMI_WEIM_D_31 */ + 637 0x80000000 /* MX53_PAD_EIM_DA0__EMI_NAND_WEIM_DA_0 */ + 642 0x80000000 /* MX53_PAD_EIM_DA1__EMI_NAND_WEIM_DA_1 */ + 647 0x80000000 /* MX53_PAD_EIM_DA2__EMI_NAND_WEIM_DA_2 */ + 652 0x80000000 /* MX53_PAD_EIM_DA3__EMI_NAND_WEIM_DA_3 */ + 657 0x80000000 /* MX53_PAD_EIM_DA4__EMI_NAND_WEIM_DA_4 */ + 662 0x80000000 /* MX53_PAD_EIM_DA5__EMI_NAND_WEIM_DA_5 */ + 667 0x80000000 /* MX53_PAD_EIM_DA6__EMI_NAND_WEIM_DA_6 */ + 611 0x80000000 /* MX53_PAD_EIM_OE__EMI_WEIM_OE */ + 616 0x80000000 /* MX53_PAD_EIM_RW__EMI_WEIM_RW */ + 607 0x80000000 /* MX53_PAD_EIM_CS1__EMI_WEIM_CS_1 */ + >; + }; + }; +}; + +&uart1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart1_2>; + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx53-evk.dts b/arch/arm/boot/dts/imx53-evk.dts index a124d1e2525..85a89b52f9b 100644 --- a/arch/arm/boot/dts/imx53-evk.dts +++ b/arch/arm/boot/dts/imx53-evk.dts @@ -21,107 +21,6 @@ reg = <0x70000000 0x80000000>; }; - soc { - aips@50000000 { /* AIPS1 */ - spba@50000000 { - esdhc@50004000 { /* ESDHC1 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_esdhc1_1>; - cd-gpios = <&gpio3 13 0>; - wp-gpios = <&gpio3 14 0>; - status = "okay"; - }; - - ecspi@50010000 { /* ECSPI1 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_ecspi1_1>; - fsl,spi-num-chipselects = <2>; - cs-gpios = <&gpio2 30 0>, <&gpio3 19 0>; - status = "okay"; - - flash: at45db321d@1 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "atmel,at45db321d", "atmel,at45", "atmel,dataflash"; - spi-max-frequency = <25000000>; - reg = <1>; - - partition@0 { - label = "U-Boot"; - reg = <0x0 0x40000>; - read-only; - }; - - partition@40000 { - label = "Kernel"; - reg = <0x40000 0x3c0000>; - }; - }; - }; - - esdhc@50020000 { /* ESDHC3 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_esdhc3_1>; - cd-gpios = <&gpio3 11 0>; - wp-gpios = <&gpio3 12 0>; - status = "okay"; - }; - }; - - iomuxc@53fa8000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_hog>; - - hog { - pinctrl_hog: hoggrp { - fsl,pins = < - 424 0x80000000 /* MX53_PAD_EIM_EB2__GPIO2_30 */ - 449 0x80000000 /* MX53_PAD_EIM_D19__GPIO3_19 */ - 693 0x80000000 /* MX53_PAD_EIM_DA11__GPIO3_11 */ - 697 0x80000000 /* MX53_PAD_EIM_DA12__GPIO3_12 */ - 701 0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */ - 705 0x80000000 /* MX53_PAD_EIM_DA14__GPIO3_14 */ - 868 0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */ - 873 0x80000000 /* MX53_PAD_PATA_DA_1__GPIO7_7 */ - >; - }; - }; - }; - - uart1: serial@53fbc000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart1_1>; - status = "okay"; - }; - }; - - aips@60000000 { /* AIPS2 */ - i2c@63fc4000 { /* I2C2 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_i2c2_1>; - status = "okay"; - - pmic: mc13892@08 { - compatible = "fsl,mc13892", "fsl,mc13xxx"; - reg = <0x08>; - }; - - codec: sgtl5000@0a { - compatible = "fsl,sgtl5000"; - reg = <0x0a>; - }; - }; - - ethernet@63fec000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_fec_1>; - phy-mode = "rmii"; - phy-reset-gpios = <&gpio7 6 0>; - status = "okay"; - }; - }; - }; - leds { compatible = "gpio-leds"; @@ -132,3 +31,96 @@ }; }; }; + +&esdhc1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc1_1>; + cd-gpios = <&gpio3 13 0>; + wp-gpios = <&gpio3 14 0>; + status = "okay"; +}; + +&ecspi1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ecspi1_1>; + fsl,spi-num-chipselects = <2>; + cs-gpios = <&gpio2 30 0>, <&gpio3 19 0>; + status = "okay"; + + flash: at45db321d@1 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "atmel,at45db321d", "atmel,at45", "atmel,dataflash"; + spi-max-frequency = <25000000>; + reg = <1>; + + partition@0 { + label = "U-Boot"; + reg = <0x0 0x40000>; + read-only; + }; + + partition@40000 { + label = "Kernel"; + reg = <0x40000 0x3c0000>; + }; + }; +}; + +&esdhc3 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc3_1>; + cd-gpios = <&gpio3 11 0>; + wp-gpios = <&gpio3 12 0>; + status = "okay"; +}; + +&iomuxc { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hog>; + + hog { + pinctrl_hog: hoggrp { + fsl,pins = < + 424 0x80000000 /* MX53_PAD_EIM_EB2__GPIO2_30 */ + 449 0x80000000 /* MX53_PAD_EIM_D19__GPIO3_19 */ + 693 0x80000000 /* MX53_PAD_EIM_DA11__GPIO3_11 */ + 697 0x80000000 /* MX53_PAD_EIM_DA12__GPIO3_12 */ + 701 0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */ + 705 0x80000000 /* MX53_PAD_EIM_DA14__GPIO3_14 */ + 868 0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */ + 873 0x80000000 /* MX53_PAD_PATA_DA_1__GPIO7_7 */ + >; + }; + }; +}; + +&uart1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart1_1>; + status = "okay"; +}; + +&i2c2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c2_1>; + status = "okay"; + + pmic: mc13892@08 { + compatible = "fsl,mc13892", "fsl,mc13xxx"; + reg = <0x08>; + }; + + codec: sgtl5000@0a { + compatible = "fsl,sgtl5000"; + reg = <0x0a>; + }; +}; + +&fec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_fec_1>; + phy-mode = "rmii"; + phy-reset-gpios = <&gpio7 6 0>; + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx53-mba53.dts b/arch/arm/boot/dts/imx53-mba53.dts new file mode 100644 index 00000000000..e54fffd4836 --- /dev/null +++ b/arch/arm/boot/dts/imx53-mba53.dts @@ -0,0 +1,130 @@ +/* + * Copyright 2012 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix + * Copyright 2012 Steffen Trumtrar <s.trumtrar@pengutronix.de>, Pengutronix + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +/dts-v1/; +/include/ "imx53-tqma53.dtsi" + +/ { + model = "TQ MBa53 starter kit"; + compatible = "tq,mba53", "tq,tqma53", "fsl,imx53"; +}; + +&iomuxc { + lvds1 { + pinctrl_lvds1_1: lvds1-grp1 { + fsl,pins = <730 0x10000 /* LVDS0_TX3 */ + 732 0x10000 /* LVDS0_CLK */ + 734 0x10000 /* LVDS0_TX2 */ + 736 0x10000 /* LVDS0_TX1 */ + 738 0x10000>; /* LVDS0_TX0 */ + }; + + pinctrl_lvds1_2: lvds1-grp2 { + fsl,pins = <720 0x10000 /* LVDS1_TX3 */ + 722 0x10000 /* LVDS1_TX2 */ + 724 0x10000 /* LVDS1_CLK */ + 726 0x10000 /* LVDS1_TX1 */ + 728 0x10000>; /* LVDS1_TX0 */ + }; + }; + + disp1 { + pinctrl_disp1_1: disp1-grp1 { + fsl,pins = <689 0x10000 /* DISP1_DRDY */ + 482 0x10000 /* DISP1_HSYNC */ + 489 0x10000 /* DISP1_VSYNC */ + 684 0x10000 /* DISP1_DAT_0 */ + 515 0x10000 /* DISP1_DAT_22 */ + 523 0x10000 /* DISP1_DAT_23 */ + 543 0x10000 /* DISP1_DAT_21 */ + 553 0x10000 /* DISP1_DAT_20 */ + 558 0x10000 /* DISP1_DAT_19 */ + 564 0x10000 /* DISP1_DAT_18 */ + 570 0x10000 /* DISP1_DAT_17 */ + 575 0x10000 /* DISP1_DAT_16 */ + 580 0x10000 /* DISP1_DAT_15 */ + 585 0x10000 /* DISP1_DAT_14 */ + 590 0x10000 /* DISP1_DAT_13 */ + 595 0x10000 /* DISP1_DAT_12 */ + 628 0x10000 /* DISP1_DAT_11 */ + 634 0x10000 /* DISP1_DAT_10 */ + 639 0x10000 /* DISP1_DAT_9 */ + 644 0x10000 /* DISP1_DAT_8 */ + 649 0x10000 /* DISP1_DAT_7 */ + 654 0x10000 /* DISP1_DAT_6 */ + 659 0x10000 /* DISP1_DAT_5 */ + 664 0x10000 /* DISP1_DAT_4 */ + 669 0x10000 /* DISP1_DAT_3 */ + 674 0x10000 /* DISP1_DAT_2 */ + 679 0x10000 /* DISP1_DAT_1 */ + 684 0x10000>; /* DISP1_DAT_0 */ + }; + }; +}; + +&cspi { + status = "okay"; +}; + +&i2c2 { + codec: sgtl5000@a { + compatible = "fsl,sgtl5000"; + reg = <0x0a>; + }; + + expander: pca9554@20 { + compatible = "pca9554"; + reg = <0x20>; + interrupts = <109>; + }; + + sensor2: lm75@49 { + compatible = "lm75"; + reg = <0x49>; + }; +}; + +&fec { + status = "okay"; +}; + +&esdhc2 { + status = "okay"; +}; + +&uart3 { + status = "okay"; +}; + +&ecspi1 { + status = "okay"; +}; + +&uart1 { + status = "okay"; +}; + +&uart2 { + status = "okay"; +}; + +&can1 { + status = "okay"; +}; + +&can2 { + status = "okay"; +}; + +&i2c3 { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx53-qsb.dts b/arch/arm/boot/dts/imx53-qsb.dts index b0075537195..05cc5620436 100644 --- a/arch/arm/boot/dts/imx53-qsb.dts +++ b/arch/arm/boot/dts/imx53-qsb.dts @@ -21,200 +21,6 @@ reg = <0x70000000 0x40000000>; }; - soc { - aips@50000000 { /* AIPS1 */ - spba@50000000 { - esdhc@50004000 { /* ESDHC1 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_esdhc1_1>; - cd-gpios = <&gpio3 13 0>; - status = "okay"; - }; - - ssi2: ssi@50014000 { - fsl,mode = "i2s-slave"; - status = "okay"; - }; - - esdhc@50020000 { /* ESDHC3 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_esdhc3_1>; - cd-gpios = <&gpio3 11 0>; - wp-gpios = <&gpio3 12 0>; - status = "okay"; - }; - }; - - iomuxc@53fa8000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_hog>; - - hog { - pinctrl_hog: hoggrp { - fsl,pins = < - 1071 0x80000000 /* MX53_PAD_GPIO_0__CCM_SSI_EXT1_CLK */ - 1141 0x80000000 /* MX53_PAD_GPIO_8__GPIO1_8 */ - 982 0x80000000 /* MX53_PAD_PATA_DATA14__GPIO2_14 */ - 989 0x80000000 /* MX53_PAD_PATA_DATA15__GPIO2_15 */ - 693 0x80000000 /* MX53_PAD_EIM_DA11__GPIO3_11 */ - 697 0x80000000 /* MX53_PAD_EIM_DA12__GPIO3_12 */ - 701 0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */ - 868 0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */ - 1149 0x80000000 /* MX53_PAD_GPIO_16__GPIO7_11 */ - >; - }; - - led_pin_gpio7_7: led_gpio7_7@0 { - fsl,pins = < - 873 0x80000000 /* MX53_PAD_PATA_DA_1__GPIO7_7 */ - >; - }; - }; - - }; - - uart1: serial@53fbc000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart1_1>; - status = "okay"; - }; - }; - - aips@60000000 { /* AIPS2 */ - i2c@63fc4000 { /* I2C2 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_i2c2_1>; - status = "okay"; - - sgtl5000: codec@0a { - compatible = "fsl,sgtl5000"; - reg = <0x0a>; - VDDA-supply = <®_3p2v>; - VDDIO-supply = <®_3p2v>; - }; - }; - - i2c@63fc8000 { /* I2C1 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_i2c1_1>; - status = "okay"; - - accelerometer: mma8450@1c { - compatible = "fsl,mma8450"; - reg = <0x1c>; - }; - - pmic: dialog@48 { - compatible = "dlg,da9053-aa", "dlg,da9052"; - reg = <0x48>; - interrupt-parent = <&gpio7>; - interrupts = <11 0x8>; /* low-level active IRQ at GPIO7_11 */ - - regulators { - buck1_reg: buck1 { - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <2075000>; - regulator-always-on; - }; - - buck2_reg: buck2 { - regulator-min-microvolt = <500000>; - regulator-max-microvolt = <2075000>; - regulator-always-on; - }; - - buck3_reg: buck3 { - regulator-min-microvolt = <925000>; - regulator-max-microvolt = <2500000>; - regulator-always-on; - }; - - buck4_reg: buck4 { - regulator-min-microvolt = <925000>; - regulator-max-microvolt = <2500000>; - regulator-always-on; - }; - - ldo1_reg: ldo1 { - regulator-min-microvolt = <600000>; - regulator-max-microvolt = <1800000>; - regulator-boot-on; - regulator-always-on; - }; - - ldo2_reg: ldo2 { - regulator-min-microvolt = <600000>; - regulator-max-microvolt = <1800000>; - regulator-always-on; - }; - - ldo3_reg: ldo3 { - regulator-min-microvolt = <600000>; - regulator-max-microvolt = <1800000>; - regulator-always-on; - }; - - ldo4_reg: ldo4 { - regulator-min-microvolt = <1725000>; - regulator-max-microvolt = <3300000>; - regulator-always-on; - }; - - ldo5_reg: ldo5 { - regulator-min-microvolt = <1725000>; - regulator-max-microvolt = <3300000>; - regulator-always-on; - }; - - ldo6_reg: ldo6 { - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <3600000>; - regulator-always-on; - }; - - ldo7_reg: ldo7 { - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <3600000>; - regulator-always-on; - }; - - ldo8_reg: ldo8 { - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <3600000>; - regulator-always-on; - }; - - ldo9_reg: ldo9 { - regulator-min-microvolt = <1200000>; - regulator-max-microvolt = <3600000>; - regulator-always-on; - }; - - ldo10_reg: ldo10 { - regulator-min-microvolt = <1250000>; - regulator-max-microvolt = <3650000>; - regulator-always-on; - }; - }; - }; - }; - - audmux@63fd0000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_audmux_1>; - status = "okay"; - }; - - ethernet@63fec000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_fec_1>; - phy-mode = "rmii"; - phy-reset-gpios = <&gpio7 6 0>; - status = "okay"; - }; - }; - }; - gpio-keys { compatible = "gpio-keys"; @@ -276,3 +82,189 @@ mux-ext-port = <5>; }; }; + +&esdhc1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc1_1>; + cd-gpios = <&gpio3 13 0>; + status = "okay"; +}; + +&ssi2 { + fsl,mode = "i2s-slave"; + status = "okay"; +}; + +&esdhc3 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc3_1>; + cd-gpios = <&gpio3 11 0>; + wp-gpios = <&gpio3 12 0>; + status = "okay"; +}; + +&iomuxc { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hog>; + + hog { + pinctrl_hog: hoggrp { + fsl,pins = < + 1071 0x80000000 /* MX53_PAD_GPIO_0__CCM_SSI_EXT1_CLK */ + 1141 0x80000000 /* MX53_PAD_GPIO_8__GPIO1_8 */ + 982 0x80000000 /* MX53_PAD_PATA_DATA14__GPIO2_14 */ + 989 0x80000000 /* MX53_PAD_PATA_DATA15__GPIO2_15 */ + 693 0x80000000 /* MX53_PAD_EIM_DA11__GPIO3_11 */ + 697 0x80000000 /* MX53_PAD_EIM_DA12__GPIO3_12 */ + 701 0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */ + 868 0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */ + 1149 0x80000000 /* MX53_PAD_GPIO_16__GPIO7_11 */ + >; + }; + + led_pin_gpio7_7: led_gpio7_7@0 { + fsl,pins = < + 873 0x80000000 /* MX53_PAD_PATA_DA_1__GPIO7_7 */ + >; + }; + }; + +}; + +&uart1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart1_1>; + status = "okay"; +}; + +&i2c2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c2_1>; + status = "okay"; + + sgtl5000: codec@0a { + compatible = "fsl,sgtl5000"; + reg = <0x0a>; + VDDA-supply = <®_3p2v>; + VDDIO-supply = <®_3p2v>; + }; +}; + +&i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c1_1>; + status = "okay"; + + accelerometer: mma8450@1c { + compatible = "fsl,mma8450"; + reg = <0x1c>; + }; + + pmic: dialog@48 { + compatible = "dlg,da9053-aa", "dlg,da9052"; + reg = <0x48>; + interrupt-parent = <&gpio7>; + interrupts = <11 0x8>; /* low-level active IRQ at GPIO7_11 */ + + regulators { + buck1_reg: buck1 { + regulator-min-microvolt = <500000>; + regulator-max-microvolt = <2075000>; + regulator-always-on; + }; + + buck2_reg: buck2 { + regulator-min-microvolt = <500000>; + regulator-max-microvolt = <2075000>; + regulator-always-on; + }; + + buck3_reg: buck3 { + regulator-min-microvolt = <925000>; + regulator-max-microvolt = <2500000>; + regulator-always-on; + }; + + buck4_reg: buck4 { + regulator-min-microvolt = <925000>; + regulator-max-microvolt = <2500000>; + regulator-always-on; + }; + + ldo1_reg: ldo1 { + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <1800000>; + regulator-boot-on; + regulator-always-on; + }; + + ldo2_reg: ldo2 { + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + }; + + ldo3_reg: ldo3 { + regulator-min-microvolt = <600000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + }; + + ldo4_reg: ldo4 { + regulator-min-microvolt = <1725000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + ldo5_reg: ldo5 { + regulator-min-microvolt = <1725000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + ldo6_reg: ldo6 { + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3600000>; + regulator-always-on; + }; + + ldo7_reg: ldo7 { + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3600000>; + regulator-always-on; + }; + + ldo8_reg: ldo8 { + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3600000>; + regulator-always-on; + }; + + ldo9_reg: ldo9 { + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <3600000>; + regulator-always-on; + }; + + ldo10_reg: ldo10 { + regulator-min-microvolt = <1250000>; + regulator-max-microvolt = <3650000>; + regulator-always-on; + }; + }; + }; +}; + +&audmux { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_audmux_1>; + status = "okay"; +}; + +&fec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_fec_1>; + phy-mode = "rmii"; + phy-reset-gpios = <&gpio7 6 0>; + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx53-smd.dts b/arch/arm/boot/dts/imx53-smd.dts index 06c68580c84..995554c324b 100644 --- a/arch/arm/boot/dts/imx53-smd.dts +++ b/arch/arm/boot/dts/imx53-smd.dts @@ -21,157 +21,6 @@ reg = <0x70000000 0x40000000>; }; - soc { - aips@50000000 { /* AIPS1 */ - spba@50000000 { - esdhc@50004000 { /* ESDHC1 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_esdhc1_1>; - cd-gpios = <&gpio3 13 0>; - wp-gpios = <&gpio4 11 0>; - status = "okay"; - }; - - esdhc@50008000 { /* ESDHC2 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_esdhc2_1>; - non-removable; - status = "okay"; - }; - - uart3: serial@5000c000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart3_1>; - fsl,uart-has-rtscts; - status = "okay"; - }; - - ecspi@50010000 { /* ECSPI1 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_ecspi1_1>; - fsl,spi-num-chipselects = <2>; - cs-gpios = <&gpio2 30 0>, <&gpio3 19 0>; - status = "okay"; - - zigbee: mc1323@0 { - compatible = "fsl,mc1323"; - spi-max-frequency = <8000000>; - reg = <0>; - }; - - flash: m25p32@1 { - #address-cells = <1>; - #size-cells = <1>; - compatible = "st,m25p32", "st,m25p"; - spi-max-frequency = <20000000>; - reg = <1>; - - partition@0 { - label = "U-Boot"; - reg = <0x0 0x40000>; - read-only; - }; - - partition@40000 { - label = "Kernel"; - reg = <0x40000 0x3c0000>; - }; - }; - }; - - esdhc@50020000 { /* ESDHC3 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_esdhc3_1>; - non-removable; - status = "okay"; - }; - }; - - iomuxc@53fa8000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_hog>; - - hog { - pinctrl_hog: hoggrp { - fsl,pins = < - 982 0x80000000 /* MX53_PAD_PATA_DATA14__GPIO2_14 */ - 989 0x80000000 /* MX53_PAD_PATA_DATA15__GPIO2_15 */ - 424 0x80000000 /* MX53_PAD_EIM_EB2__GPIO2_30 */ - 701 0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */ - 449 0x80000000 /* MX53_PAD_EIM_D19__GPIO3_19 */ - 43 0x80000000 /* MX53_PAD_KEY_ROW2__GPIO4_11 */ - 868 0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */ - >; - }; - }; - }; - - uart1: serial@53fbc000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart1_1>; - status = "okay"; - }; - - uart2: serial@53fc0000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart2_1>; - status = "okay"; - }; - }; - - aips@60000000 { /* AIPS2 */ - i2c@63fc4000 { /* I2C2 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_i2c2_1>; - status = "okay"; - - codec: sgtl5000@0a { - compatible = "fsl,sgtl5000"; - reg = <0x0a>; - }; - - magnetometer: mag3110@0e { - compatible = "fsl,mag3110"; - reg = <0x0e>; - }; - - touchkey: mpr121@5a { - compatible = "fsl,mpr121"; - reg = <0x5a>; - }; - }; - - i2c@63fc8000 { /* I2C1 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_i2c1_1>; - status = "okay"; - - accelerometer: mma8450@1c { - compatible = "fsl,mma8450"; - reg = <0x1c>; - }; - - camera: ov5642@3c { - compatible = "ovti,ov5642"; - reg = <0x3c>; - }; - - pmic: dialog@48 { - compatible = "dialog,da9053", "dialog,da9052"; - reg = <0x48>; - }; - }; - - ethernet@63fec000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_fec_1>; - phy-mode = "rmii"; - phy-reset-gpios = <&gpio7 6 0>; - status = "okay"; - }; - }; - }; - gpio-keys { compatible = "gpio-keys"; @@ -188,3 +37,146 @@ }; }; }; + +&esdhc1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc1_1>; + cd-gpios = <&gpio3 13 0>; + wp-gpios = <&gpio4 11 0>; + status = "okay"; +}; + +&esdhc2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc2_1>; + non-removable; + status = "okay"; +}; + +&uart3 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart3_1>; + fsl,uart-has-rtscts; + status = "okay"; +}; + +&ecspi1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ecspi1_1>; + fsl,spi-num-chipselects = <2>; + cs-gpios = <&gpio2 30 0>, <&gpio3 19 0>; + status = "okay"; + + zigbee: mc1323@0 { + compatible = "fsl,mc1323"; + spi-max-frequency = <8000000>; + reg = <0>; + }; + + flash: m25p32@1 { + #address-cells = <1>; + #size-cells = <1>; + compatible = "st,m25p32", "st,m25p"; + spi-max-frequency = <20000000>; + reg = <1>; + + partition@0 { + label = "U-Boot"; + reg = <0x0 0x40000>; + read-only; + }; + + partition@40000 { + label = "Kernel"; + reg = <0x40000 0x3c0000>; + }; + }; +}; + +&esdhc3 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc3_1>; + non-removable; + status = "okay"; +}; + +&iomuxc { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hog>; + + hog { + pinctrl_hog: hoggrp { + fsl,pins = < + 982 0x80000000 /* MX53_PAD_PATA_DATA14__GPIO2_14 */ + 989 0x80000000 /* MX53_PAD_PATA_DATA15__GPIO2_15 */ + 424 0x80000000 /* MX53_PAD_EIM_EB2__GPIO2_30 */ + 701 0x80000000 /* MX53_PAD_EIM_DA13__GPIO3_13 */ + 449 0x80000000 /* MX53_PAD_EIM_D19__GPIO3_19 */ + 43 0x80000000 /* MX53_PAD_KEY_ROW2__GPIO4_11 */ + 868 0x80000000 /* MX53_PAD_PATA_DA_0__GPIO7_6 */ + >; + }; + }; +}; + +&uart1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart1_1>; + status = "okay"; +}; + +&uart2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart2_1>; + status = "okay"; +}; + +&i2c2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c2_1>; + status = "okay"; + + codec: sgtl5000@0a { + compatible = "fsl,sgtl5000"; + reg = <0x0a>; + }; + + magnetometer: mag3110@0e { + compatible = "fsl,mag3110"; + reg = <0x0e>; + }; + + touchkey: mpr121@5a { + compatible = "fsl,mpr121"; + reg = <0x5a>; + }; +}; + +&i2c1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c1_1>; + status = "okay"; + + accelerometer: mma8450@1c { + compatible = "fsl,mma8450"; + reg = <0x1c>; + }; + + camera: ov5642@3c { + compatible = "ovti,ov5642"; + reg = <0x3c>; + }; + + pmic: dialog@48 { + compatible = "dialog,da9053", "dialog,da9052"; + reg = <0x48>; + }; +}; + +&fec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_fec_1>; + phy-mode = "rmii"; + phy-reset-gpios = <&gpio7 6 0>; + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx53-tqma53.dtsi b/arch/arm/boot/dts/imx53-tqma53.dtsi new file mode 100644 index 00000000000..8278ec5ec22 --- /dev/null +++ b/arch/arm/boot/dts/imx53-tqma53.dtsi @@ -0,0 +1,172 @@ +/* + * Copyright 2012 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix + * Copyright 2012 Steffen Trumtrar <s.trumtrar@pengutronix.de>, Pengutronix + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +/include/ "imx53.dtsi" + +/ { + model = "TQ TQMa53"; + compatible = "tq,tqma53", "fsl,imx53"; + + memory { + reg = <0x70000000 0x40000000>; /* Up to 1GiB */ + }; + + regulators { + compatible = "simple-bus"; + + reg_3p3v: 3p3v { + compatible = "regulator-fixed"; + regulator-name = "3P3V"; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + }; +}; + +&esdhc2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc2_1>; + wp-gpios = <&gpio1 2 0>; + cd-gpios = <&gpio1 4 0>; + status = "disabled"; +}; + +&uart3 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart3_2>; + status = "disabled"; +}; + +&ecspi1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ecspi1_1>; + fsl,spi-num-chipselects = <4>; + cs-gpios = <&gpio2 30 0>, <&gpio3 19 0>, + <&gpio3 24 0>, <&gpio3 25 0>; + status = "disabled"; +}; + +&esdhc3 { /* EMMC */ + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_esdhc3_1>; + vmmc-supply = <®_3p3v>; + non-removable; + bus-width = <8>; + status = "okay"; +}; + +&iomuxc { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hog>; + + i2s { + pinctrl_i2s_1: i2s-grp1 { + fsl,pins = < + 1 0x10000 /* I2S_MCLK */ + 10 0x10000 /* I2S_SCLK */ + 17 0x10000 /* I2S_DOUT */ + 23 0x10000 /* I2S_LRCLK*/ + 30 0x10000 /* I2S_DIN */ + >; + }; + }; + + hog { + pinctrl_hog: hoggrp { + fsl,pins = < + 610 0x10000 /* MX53_PAD_EIM_CS1__IPU_DI1_PIN6 (VSYNC)*/ + 711 0x10000 /* MX53_PAD_EIM_DA15__IPU_DI1_PIN4 (HSYNC)*/ + 873 0x10000 /* MX53_PAD_PATA_DA_1__GPIO7_7 (LCD_BLT_EN)*/ + 878 0x10000 /* MX53_PAD_PATA_DA_2__GPIO7_8 (LCD_RESET)*/ + 922 0x10000 /* MX53_PAD_PATA_DATA5__GPIO2_5 (LCD_POWER)*/ + 928 0x10000 /* MX53_PAD_PATA_DATA6__GPIO2_6 (PMIC_INT)*/ + 982 0x10000 /* MX53_PAD_PATA_DATA14__GPIO2_14 (CSI_RST)*/ + 989 0x10000 /* MX53_PAD_PATA_DATA15__GPIO2_15 (CSI_PWDN)*/ + 1069 0x10000 /* MX53_PAD_GPIO_0__GPIO1_0 (SYSTEM_DOWN)*/ + 1093 0x10000 /* MX53_PAD_GPIO_3__GPIO1_3 */ + >; + }; + }; +}; + +&uart1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart1_2>; + fsl,uart-has-rtscts; + status = "disabled"; +}; + +&uart2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart2_1>; + status = "disabled"; +}; + +&can1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_can1_2>; + status = "disabled"; +}; + +&can2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_can2_1>; + status = "disabled"; +}; + +&i2c3 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c3_1>; + status = "disabled"; +}; + +&cspi { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_cspi_1>; + fsl,spi-num-chipselects = <3>; + cs-gpios = <&gpio1 18 0>, <&gpio1 19 0>, + <&gpio1 21 0>; + status = "disabled"; +}; + +&i2c2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c2_1>; + status = "okay"; + + pmic: mc34708@8 { + compatible = "fsl,mc34708"; + reg = <0x8>; + fsl,mc13xxx-uses-rtc; + interrupt-parent = <&gpio2>; + interrupts = <6 8>; /* PDATA_DATA6, low active */ + }; + + sensor1: lm75@48 { + compatible = "lm75"; + reg = <0x48>; + }; + + eeprom: 24c64@50 { + compatible = "at,24c64"; + pagesize = <32>; + reg = <0x50>; + }; +}; + +&fec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_fec_1>; + phy-mode = "rmii"; + status = "disabled"; +}; diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi index edc3f1eb669..d05aa215c7f 100644 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@ -274,6 +274,44 @@ }; }; + csi { + pinctrl_csi_1: csigrp-1 { + fsl,pins = < + 286 0x1d5 /* MX53_PAD_CSI0_DATA_EN__IPU_CSI0_DATA_EN */ + 291 0x1d5 /* MX53_PAD_CSI0_VSYNC__IPU_CSI0_VSYNC */ + 280 0x1d5 /* MX53_PAD_CSI0_MCLK__IPU_CSI0_HSYNC */ + 276 0x1d5 /* MX53_PAD_CSI0_PIXCLK__IPU_CSI0_PIXCLK */ + 409 0x1d5 /* MX53_PAD_CSI0_DAT19__IPU_CSI0_D_19 */ + 402 0x1d5 /* MX53_PAD_CSI0_DAT18__IPU_CSI0_D_18 */ + 395 0x1d5 /* MX53_PAD_CSI0_DAT17__IPU_CSI0_D_17 */ + 388 0x1d5 /* MX53_PAD_CSI0_DAT16__IPU_CSI0_D_16 */ + 381 0x1d5 /* MX53_PAD_CSI0_DAT15__IPU_CSI0_D_15 */ + 374 0x1d5 /* MX53_PAD_CSI0_DAT14__IPU_CSI0_D_14 */ + 367 0x1d5 /* MX53_PAD_CSI0_DAT13__IPU_CSI0_D_13 */ + 360 0x1d5 /* MX53_PAD_CSI0_DAT12__IPU_CSI0_D_12 */ + 352 0x1d5 /* MX53_PAD_CSI0_DAT11__IPU_CSI0_D_11 */ + 344 0x1d5 /* MX53_PAD_CSI0_DAT10__IPU_CSI0_D_10 */ + 336 0x1d5 /* MX53_PAD_CSI0_DAT9__IPU_CSI0_D_9 */ + 328 0x1d5 /* MX53_PAD_CSI0_DAT8__IPU_CSI0_D_8 */ + 320 0x1d5 /* MX53_PAD_CSI0_DAT7__IPU_CSI0_D_7 */ + 312 0x1d5 /* MX53_PAD_CSI0_DAT6__IPU_CSI0_D_6 */ + 304 0x1d5 /* MX53_PAD_CSI0_DAT5__IPU_CSI0_D_5 */ + 296 0x1d5 /* MX53_PAD_CSI0_DAT4__IPU_CSI0_D_4 */ + 276 0x1d5 /* MX53_PAD_CSI0_PIXCLK__IPU_CSI0_PIXCLK */ + >; + }; + }; + + cspi { + pinctrl_cspi_1: cspigrp-1 { + fsl,pins = < + 998 0x1d5 /* MX53_PAD_SD1_DATA0__CSPI_MISO */ + 1008 0x1d5 /* MX53_PAD_SD1_CMD__CSPI_MOSI */ + 1022 0x1d5 /* MX53_PAD_SD1_CLK__CSPI_SCLK */ + >; + }; + }; + ecspi1 { pinctrl_ecspi1_1: ecspi1grp-1 { fsl,pins = < @@ -349,6 +387,13 @@ 853 0x80000000 /* MX53_PAD_PATA_DIOR__CAN1_RXCAN */ >; }; + + pinctrl_can1_2: can1grp-2 { + fsl,pins = < + 37 0x80000000 /* MX53_PAD_KEY_COL2__CAN1_TXCAN */ + 44 0x80000000 /* MX53_PAD_KEY_ROW2__CAN1_RXCAN */ + >; + }; }; can2 { @@ -387,6 +432,14 @@ }; }; + owire { + pinctrl_owire_1: owiregrp-1 { + fsl,pins = < + 1166 0x80000000 /* MX53_PAD_GPIO_18__OWIRE_LINE */ + >; + }; + }; + uart1 { pinctrl_uart1_1: uart1grp-1 { fsl,pins = < @@ -421,6 +474,14 @@ 880 0x1c5 /* MX53_PAD_PATA_DA_2__UART3_RTS */ >; }; + + pinctrl_uart3_2: uart3grp-2 { + fsl,pins = < + 884 0x1c5 /* MX53_PAD_PATA_CS_0__UART3_TXD_MUX */ + 888 0x1c5 /* MX53_PAD_PATA_CS_1__UART3_RXD_MUX */ + >; + }; + }; uart4 { @@ -570,6 +631,13 @@ status = "disabled"; }; + owire: owire@63fa4000 { + compatible = "fsl,imx53-owire", "fsl,imx21-owire"; + reg = <0x63fa4000 0x4000>; + clocks = <&clks 159>; + status = "disabled"; + }; + ecspi2: ecspi@63fac000 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/imx6dl.dtsi b/arch/arm/boot/dts/imx6dl.dtsi new file mode 100644 index 00000000000..63fafe2a606 --- /dev/null +++ b/arch/arm/boot/dts/imx6dl.dtsi @@ -0,0 +1,59 @@ +/* + * Copyright 2013 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +/include/ "imx6qdl.dtsi" + +/ { + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + compatible = "arm,cortex-a9"; + reg = <0>; + next-level-cache = <&L2>; + }; + + cpu@1 { + compatible = "arm,cortex-a9"; + reg = <1>; + next-level-cache = <&L2>; + }; + }; + + soc { + aips1: aips-bus@02000000 { + pxp: pxp@020f0000 { + reg = <0x020f0000 0x4000>; + interrupts = <0 98 0x04>; + }; + + epdc: epdc@020f4000 { + reg = <0x020f4000 0x4000>; + interrupts = <0 97 0x04>; + }; + + lcdif: lcdif@020f8000 { + reg = <0x020f8000 0x4000>; + interrupts = <0 39 0x04>; + }; + }; + + aips2: aips-bus@02100000 { + i2c4: i2c@021f8000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,imx1-i2c"; + reg = <0x021f8000 0x4000>; + interrupts = <0 35 0x04>; + status = "disabled"; + }; + }; + }; +}; diff --git a/arch/arm/boot/dts/imx6q-arm2.dts b/arch/arm/boot/dts/imx6q-arm2.dts index 5bfa02a3f85..53eb241fa5a 100644 --- a/arch/arm/boot/dts/imx6q-arm2.dts +++ b/arch/arm/boot/dts/imx6q-arm2.dts @@ -21,71 +21,6 @@ reg = <0x10000000 0x80000000>; }; - soc { - gpmi-nand@00112000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_gpmi_nand_1>; - status = "disabled"; /* gpmi nand conflicts with SD */ - }; - - aips-bus@02000000 { /* AIPS1 */ - iomuxc@020e0000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_hog>; - - hog { - pinctrl_hog: hoggrp { - fsl,pins = < - 176 0x80000000 /* MX6Q_PAD_EIM_D25__GPIO_3_25 */ - >; - }; - }; - - arm2 { - pinctrl_usdhc3_arm2: usdhc3grp-arm2 { - fsl,pins = < - 1363 0x80000000 /* MX6Q_PAD_NANDF_CS0__GPIO_6_11 */ - 1369 0x80000000 /* MX6Q_PAD_NANDF_CS1__GPIO_6_14 */ - >; - }; - }; - }; - }; - - aips-bus@02100000 { /* AIPS2 */ - ethernet@02188000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_enet_2>; - phy-mode = "rgmii"; - status = "okay"; - }; - - usdhc@02198000 { /* uSDHC3 */ - cd-gpios = <&gpio6 11 0>; - wp-gpios = <&gpio6 14 0>; - vmmc-supply = <®_3p3v>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usdhc3_1 - &pinctrl_usdhc3_arm2>; - status = "okay"; - }; - - usdhc@0219c000 { /* uSDHC4 */ - non-removable; - vmmc-supply = <®_3p3v>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usdhc4_1>; - status = "okay"; - }; - - uart4: serial@021f0000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart4_1>; - status = "okay"; - }; - }; - }; - regulators { compatible = "simple-bus"; @@ -108,3 +43,62 @@ }; }; }; + +&gpmi { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_gpmi_nand_1>; + status = "disabled"; /* gpmi nand conflicts with SD */ +}; + +&iomuxc { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hog>; + + hog { + pinctrl_hog: hoggrp { + fsl,pins = < + 176 0x80000000 /* MX6Q_PAD_EIM_D25__GPIO_3_25 */ + >; + }; + }; + + arm2 { + pinctrl_usdhc3_arm2: usdhc3grp-arm2 { + fsl,pins = < + 1363 0x80000000 /* MX6Q_PAD_NANDF_CS0__GPIO_6_11 */ + 1369 0x80000000 /* MX6Q_PAD_NANDF_CS1__GPIO_6_14 */ + >; + }; + }; +}; + +&fec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_enet_2>; + phy-mode = "rgmii"; + status = "okay"; +}; + +&usdhc3 { + cd-gpios = <&gpio6 11 0>; + wp-gpios = <&gpio6 14 0>; + vmmc-supply = <®_3p3v>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc3_1 + &pinctrl_usdhc3_arm2>; + status = "okay"; +}; + +&usdhc4 { + non-removable; + vmmc-supply = <®_3p3v>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc4_1>; + status = "okay"; +}; + +&uart4 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart4_1>; + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx6q-sabreauto.dts b/arch/arm/boot/dts/imx6q-sabreauto.dts index 826e4ad1477..656d489122f 100644 --- a/arch/arm/boot/dts/imx6q-sabreauto.dts +++ b/arch/arm/boot/dts/imx6q-sabreauto.dts @@ -20,45 +20,39 @@ memory { reg = <0x10000000 0x80000000>; }; +}; - soc { - aips-bus@02000000 { /* AIPS1 */ - iomuxc@020e0000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_hog>; +&iomuxc { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hog>; - hog { - pinctrl_hog: hoggrp { - fsl,pins = < - 1376 0x80000000 /* MX6Q_PAD_NANDF_CS2__GPIO_6_15 */ - 13 0x80000000 /* MX6Q_PAD_SD2_DAT2__GPIO_1_13 */ - >; - }; - }; - }; + hog { + pinctrl_hog: hoggrp { + fsl,pins = < + 1376 0x80000000 /* MX6Q_PAD_NANDF_CS2__GPIO_6_15 */ + 13 0x80000000 /* MX6Q_PAD_SD2_DAT2__GPIO_1_13 */ + >; }; + }; +}; - aips-bus@02100000 { /* AIPS2 */ - uart4: serial@021f0000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart4_1>; - status = "okay"; - }; +&uart4 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart4_1>; + status = "okay"; +}; - ethernet@02188000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_enet_2>; - phy-mode = "rgmii"; - status = "okay"; - }; +&fec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_enet_2>; + phy-mode = "rgmii"; + status = "okay"; +}; - usdhc@02198000 { /* uSDHC3 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usdhc3_1>; - cd-gpios = <&gpio6 15 0>; - wp-gpios = <&gpio1 13 0>; - status = "okay"; - }; - }; - }; +&usdhc3 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc3_1>; + cd-gpios = <&gpio6 15 0>; + wp-gpios = <&gpio1 13 0>; + status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6q-sabrelite.dts b/arch/arm/boot/dts/imx6q-sabrelite.dts index d152328285a..2ce355cd05e 100644 --- a/arch/arm/boot/dts/imx6q-sabrelite.dts +++ b/arch/arm/boot/dts/imx6q-sabrelite.dts @@ -21,118 +21,6 @@ reg = <0x10000000 0x40000000>; }; - soc { - aips-bus@02000000 { /* AIPS1 */ - spba-bus@02000000 { - ecspi@02008000 { /* eCSPI1 */ - fsl,spi-num-chipselects = <1>; - cs-gpios = <&gpio3 19 0>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_ecspi1_1>; - status = "okay"; - - flash: m25p80@0 { - compatible = "sst,sst25vf016b"; - spi-max-frequency = <20000000>; - reg = <0>; - }; - }; - - ssi1: ssi@02028000 { - fsl,mode = "i2s-slave"; - status = "okay"; - }; - }; - - iomuxc@020e0000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_hog>; - - hog { - pinctrl_hog: hoggrp { - fsl,pins = < - 1450 0x80000000 /* MX6Q_PAD_NANDF_D6__GPIO_2_6 */ - 1458 0x80000000 /* MX6Q_PAD_NANDF_D7__GPIO_2_7 */ - 121 0x80000000 /* MX6Q_PAD_EIM_D19__GPIO_3_19 */ - 144 0x80000000 /* MX6Q_PAD_EIM_D22__GPIO_3_22 */ - 152 0x80000000 /* MX6Q_PAD_EIM_D23__GPIO_3_23 */ - 1262 0x80000000 /* MX6Q_PAD_SD3_DAT5__GPIO_7_0 */ - 1270 0x1f0b0 /* MX6Q_PAD_SD3_DAT4__GPIO_7_1 */ - 953 0x80000000 /* MX6Q_PAD_GPIO_0__CCM_CLKO */ - >; - }; - }; - }; - }; - - aips-bus@02100000 { /* AIPS2 */ - usb@02184000 { /* USB OTG */ - vbus-supply = <®_usb_otg_vbus>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usbotg_1>; - disable-over-current; - status = "okay"; - }; - - usb@02184200 { /* USB1 */ - status = "okay"; - }; - - ethernet@02188000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_enet_1>; - phy-mode = "rgmii"; - phy-reset-gpios = <&gpio3 23 0>; - status = "okay"; - }; - - usdhc@02198000 { /* uSDHC3 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usdhc3_2>; - cd-gpios = <&gpio7 0 0>; - wp-gpios = <&gpio7 1 0>; - vmmc-supply = <®_3p3v>; - status = "okay"; - }; - - usdhc@0219c000 { /* uSDHC4 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usdhc4_2>; - cd-gpios = <&gpio2 6 0>; - wp-gpios = <&gpio2 7 0>; - vmmc-supply = <®_3p3v>; - status = "okay"; - }; - - audmux@021d8000 { - status = "okay"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_audmux_1>; - }; - - uart2: serial@021e8000 { - status = "okay"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart2_1>; - }; - - i2c@021a0000 { /* I2C1 */ - status = "okay"; - clock-frequency = <100000>; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_i2c1_1>; - - codec: sgtl5000@0a { - compatible = "fsl,sgtl5000"; - reg = <0x0a>; - clocks = <&clks 169>; - VDDA-supply = <®_2p5v>; - VDDIO-supply = <®_3p3v>; - }; - }; - }; - }; - regulators { compatible = "simple-bus"; @@ -176,3 +64,107 @@ mux-ext-port = <4>; }; }; + +&ecspi1 { + fsl,spi-num-chipselects = <1>; + cs-gpios = <&gpio3 19 0>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_ecspi1_1>; + status = "okay"; + + flash: m25p80@0 { + compatible = "sst,sst25vf016b"; + spi-max-frequency = <20000000>; + reg = <0>; + }; +}; + +&ssi1 { + fsl,mode = "i2s-slave"; + status = "okay"; +}; + +&iomuxc { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hog>; + + hog { + pinctrl_hog: hoggrp { + fsl,pins = < + 1450 0x80000000 /* MX6Q_PAD_NANDF_D6__GPIO_2_6 */ + 1458 0x80000000 /* MX6Q_PAD_NANDF_D7__GPIO_2_7 */ + 121 0x80000000 /* MX6Q_PAD_EIM_D19__GPIO_3_19 */ + 144 0x80000000 /* MX6Q_PAD_EIM_D22__GPIO_3_22 */ + 152 0x80000000 /* MX6Q_PAD_EIM_D23__GPIO_3_23 */ + 1262 0x80000000 /* MX6Q_PAD_SD3_DAT5__GPIO_7_0 */ + 1270 0x1f0b0 /* MX6Q_PAD_SD3_DAT4__GPIO_7_1 */ + 953 0x80000000 /* MX6Q_PAD_GPIO_0__CCM_CLKO */ + >; + }; + }; +}; + +&usbotg { + vbus-supply = <®_usb_otg_vbus>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usbotg_1>; + disable-over-current; + status = "okay"; +}; + +&usbh1 { + status = "okay"; +}; + +&fec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_enet_1>; + phy-mode = "rgmii"; + phy-reset-gpios = <&gpio3 23 0>; + status = "okay"; +}; + +&usdhc3 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc3_2>; + cd-gpios = <&gpio7 0 0>; + wp-gpios = <&gpio7 1 0>; + vmmc-supply = <®_3p3v>; + status = "okay"; +}; + +&usdhc4 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc4_2>; + cd-gpios = <&gpio2 6 0>; + wp-gpios = <&gpio2 7 0>; + vmmc-supply = <®_3p3v>; + status = "okay"; +}; + +&audmux { + status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_audmux_1>; +}; + +&uart2 { + status = "okay"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart2_1>; +}; + +&i2c1 { + status = "okay"; + clock-frequency = <100000>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_i2c1_1>; + + codec: sgtl5000@0a { + compatible = "fsl,sgtl5000"; + reg = <0x0a>; + clocks = <&clks 169>; + VDDA-supply = <®_2p5v>; + VDDIO-supply = <®_3p3v>; + }; +}; diff --git a/arch/arm/boot/dts/imx6q-sabresd.dts b/arch/arm/boot/dts/imx6q-sabresd.dts index a42402562b7..2dea304a798 100644 --- a/arch/arm/boot/dts/imx6q-sabresd.dts +++ b/arch/arm/boot/dts/imx6q-sabresd.dts @@ -21,61 +21,6 @@ reg = <0x10000000 0x40000000>; }; - soc { - aips-bus@02000000 { /* AIPS1 */ - spba-bus@02000000 { - uart1: serial@02020000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_uart1_1>; - status = "okay"; - }; - }; - - iomuxc@020e0000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_hog>; - - hog { - pinctrl_hog: hoggrp { - fsl,pins = < - 1004 0x80000000 /* MX6Q_PAD_GPIO_4__GPIO_1_4 */ - 1012 0x80000000 /* MX6Q_PAD_GPIO_5__GPIO_1_5 */ - 1402 0x80000000 /* MX6Q_PAD_NANDF_D0__GPIO_2_0 */ - 1410 0x80000000 /* MX6Q_PAD_NANDF_D1__GPIO_2_1 */ - 1418 0x80000000 /* MX6Q_PAD_NANDF_D2__GPIO_2_2 */ - 1426 0x80000000 /* MX6Q_PAD_NANDF_D3__GPIO_2_3 */ - >; - }; - }; - }; - }; - - aips-bus@02100000 { /* AIPS2 */ - ethernet@02188000 { - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_enet_1>; - phy-mode = "rgmii"; - status = "okay"; - }; - - usdhc@02194000 { /* uSDHC2 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usdhc2_1>; - cd-gpios = <&gpio2 2 0>; - wp-gpios = <&gpio2 3 0>; - status = "okay"; - }; - - usdhc@02198000 { /* uSDHC3 */ - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usdhc3_1>; - cd-gpios = <&gpio2 0 0>; - wp-gpios = <&gpio2 1 0>; - status = "okay"; - }; - }; - }; - gpio-keys { compatible = "gpio-keys"; @@ -92,3 +37,50 @@ }; }; }; + +&uart1 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_uart1_1>; + status = "okay"; +}; + +&iomuxc { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_hog>; + + hog { + pinctrl_hog: hoggrp { + fsl,pins = < + 1004 0x80000000 /* MX6Q_PAD_GPIO_4__GPIO_1_4 */ + 1012 0x80000000 /* MX6Q_PAD_GPIO_5__GPIO_1_5 */ + 1402 0x80000000 /* MX6Q_PAD_NANDF_D0__GPIO_2_0 */ + 1410 0x80000000 /* MX6Q_PAD_NANDF_D1__GPIO_2_1 */ + 1418 0x80000000 /* MX6Q_PAD_NANDF_D2__GPIO_2_2 */ + 1426 0x80000000 /* MX6Q_PAD_NANDF_D3__GPIO_2_3 */ + >; + }; + }; +}; + +&fec { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_enet_1>; + phy-mode = "rgmii"; + status = "okay"; +}; + +&usdhc2 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc2_1>; + cd-gpios = <&gpio2 2 0>; + wp-gpios = <&gpio2 3 0>; + status = "okay"; +}; + +&usdhc3 { + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usdhc3_1>; + cd-gpios = <&gpio2 0 0>; + wp-gpios = <&gpio2 1 0>; + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi index ff1205ea571..cba021eb035 100644 --- a/arch/arm/boot/dts/imx6q.dtsi +++ b/arch/arm/boot/dts/imx6q.dtsi @@ -1,33 +1,16 @@ + /* - * Copyright 2011 Freescale Semiconductor, Inc. - * Copyright 2011 Linaro Ltd. + * Copyright 2013 Freescale Semiconductor, Inc. * - * The code contained herein is licensed under the GNU General Public - * License. You may obtain a copy of the GNU General Public License - * Version 2 or later at the following locations: + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. * - * http://www.opensource.org/licenses/gpl-license.html - * http://www.gnu.org/copyleft/gpl.html */ -/include/ "skeleton.dtsi" +/include/ "imx6qdl.dtsi" / { - aliases { - serial0 = &uart1; - serial1 = &uart2; - serial2 = &uart3; - serial3 = &uart4; - serial4 = &uart5; - gpio0 = &gpio1; - gpio1 = &gpio2; - gpio2 = &gpio3; - gpio3 = &gpio4; - gpio4 = &gpio5; - gpio5 = &gpio6; - gpio6 = &gpio7; - }; - cpus { #address-cells = <1>; #size-cells = <0>; @@ -38,12 +21,19 @@ next-level-cache = <&L2>; operating-points = < /* kHz uV */ - 792000 1100000 + 1200000 1275000 + 996000 1250000 + 792000 1150000 396000 950000 - 198000 850000 >; clock-latency = <61036>; /* two CLK32 periods */ - cpu0-supply = <®_cpu>; + clocks = <&clks 104>, <&clks 6>, <&clks 16>, + <&clks 17>, <&clks 170>; + clock-names = "arm", "pll2_pfd2_396m", "step", + "pll1_sw", "pll1_sys"; + arm-supply = <®_arm>; + pu-supply = <®_pu>; + soc-supply = <®_soc>; }; cpu@1 { @@ -65,142 +55,9 @@ }; }; - intc: interrupt-controller@00a01000 { - compatible = "arm,cortex-a9-gic"; - #interrupt-cells = <3>; - #address-cells = <1>; - #size-cells = <1>; - interrupt-controller; - reg = <0x00a01000 0x1000>, - <0x00a00100 0x100>; - }; - - clocks { - #address-cells = <1>; - #size-cells = <0>; - - ckil { - compatible = "fsl,imx-ckil", "fixed-clock"; - clock-frequency = <32768>; - }; - - ckih1 { - compatible = "fsl,imx-ckih1", "fixed-clock"; - clock-frequency = <0>; - }; - - osc { - compatible = "fsl,imx-osc", "fixed-clock"; - clock-frequency = <24000000>; - }; - }; - soc { - #address-cells = <1>; - #size-cells = <1>; - compatible = "simple-bus"; - interrupt-parent = <&intc>; - ranges; - - dma-apbh@00110000 { - compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; - reg = <0x00110000 0x2000>; - clocks = <&clks 106>; - }; - - nfc: gpmi-nand@00112000 { - compatible = "fsl,imx6q-gpmi-nand"; - #address-cells = <1>; - #size-cells = <1>; - reg = <0x00112000 0x2000>, <0x00114000 0x2000>; - reg-names = "gpmi-nand", "bch"; - interrupts = <0 13 0x04>, <0 15 0x04>; - interrupt-names = "gpmi-dma", "bch"; - clocks = <&clks 152>, <&clks 153>, <&clks 151>, - <&clks 150>, <&clks 149>; - clock-names = "gpmi_io", "gpmi_apb", "gpmi_bch", - "gpmi_bch_apb", "per1_bch"; - fsl,gpmi-dma-channel = <0>; - status = "disabled"; - }; - - timer@00a00600 { - compatible = "arm,cortex-a9-twd-timer"; - reg = <0x00a00600 0x20>; - interrupts = <1 13 0xf01>; - }; - - L2: l2-cache@00a02000 { - compatible = "arm,pl310-cache"; - reg = <0x00a02000 0x1000>; - interrupts = <0 92 0x04>; - cache-unified; - cache-level = <2>; - }; - aips-bus@02000000 { /* AIPS1 */ - compatible = "fsl,aips-bus", "simple-bus"; - #address-cells = <1>; - #size-cells = <1>; - reg = <0x02000000 0x100000>; - ranges; - spba-bus@02000000 { - compatible = "fsl,spba-bus", "simple-bus"; - #address-cells = <1>; - #size-cells = <1>; - reg = <0x02000000 0x40000>; - ranges; - - spdif: spdif@02004000 { - reg = <0x02004000 0x4000>; - interrupts = <0 52 0x04>; - }; - - ecspi1: ecspi@02008000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi"; - reg = <0x02008000 0x4000>; - interrupts = <0 31 0x04>; - clocks = <&clks 112>, <&clks 112>; - clock-names = "ipg", "per"; - status = "disabled"; - }; - - ecspi2: ecspi@0200c000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi"; - reg = <0x0200c000 0x4000>; - interrupts = <0 32 0x04>; - clocks = <&clks 113>, <&clks 113>; - clock-names = "ipg", "per"; - status = "disabled"; - }; - - ecspi3: ecspi@02010000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi"; - reg = <0x02010000 0x4000>; - interrupts = <0 33 0x04>; - clocks = <&clks 114>, <&clks 114>; - clock-names = "ipg", "per"; - status = "disabled"; - }; - - ecspi4: ecspi@02014000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi"; - reg = <0x02014000 0x4000>; - interrupts = <0 34 0x04>; - clocks = <&clks 115>, <&clks 115>; - clock-names = "ipg", "per"; - status = "disabled"; - }; - ecspi5: ecspi@02018000 { #address-cells = <1>; #size-cells = <0>; @@ -211,361 +68,6 @@ clock-names = "ipg", "per"; status = "disabled"; }; - - uart1: serial@02020000 { - compatible = "fsl,imx6q-uart", "fsl,imx21-uart"; - reg = <0x02020000 0x4000>; - interrupts = <0 26 0x04>; - clocks = <&clks 160>, <&clks 161>; - clock-names = "ipg", "per"; - status = "disabled"; - }; - - esai: esai@02024000 { - reg = <0x02024000 0x4000>; - interrupts = <0 51 0x04>; - }; - - ssi1: ssi@02028000 { - compatible = "fsl,imx6q-ssi","fsl,imx21-ssi"; - reg = <0x02028000 0x4000>; - interrupts = <0 46 0x04>; - clocks = <&clks 178>; - fsl,fifo-depth = <15>; - fsl,ssi-dma-events = <38 37>; - status = "disabled"; - }; - - ssi2: ssi@0202c000 { - compatible = "fsl,imx6q-ssi","fsl,imx21-ssi"; - reg = <0x0202c000 0x4000>; - interrupts = <0 47 0x04>; - clocks = <&clks 179>; - fsl,fifo-depth = <15>; - fsl,ssi-dma-events = <42 41>; - status = "disabled"; - }; - - ssi3: ssi@02030000 { - compatible = "fsl,imx6q-ssi","fsl,imx21-ssi"; - reg = <0x02030000 0x4000>; - interrupts = <0 48 0x04>; - clocks = <&clks 180>; - fsl,fifo-depth = <15>; - fsl,ssi-dma-events = <46 45>; - status = "disabled"; - }; - - asrc: asrc@02034000 { - reg = <0x02034000 0x4000>; - interrupts = <0 50 0x04>; - }; - - spba@0203c000 { - reg = <0x0203c000 0x4000>; - }; - }; - - vpu: vpu@02040000 { - reg = <0x02040000 0x3c000>; - interrupts = <0 3 0x04 0 12 0x04>; - }; - - aipstz@0207c000 { /* AIPSTZ1 */ - reg = <0x0207c000 0x4000>; - }; - - pwm1: pwm@02080000 { - #pwm-cells = <2>; - compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm"; - reg = <0x02080000 0x4000>; - interrupts = <0 83 0x04>; - clocks = <&clks 62>, <&clks 145>; - clock-names = "ipg", "per"; - }; - - pwm2: pwm@02084000 { - #pwm-cells = <2>; - compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm"; - reg = <0x02084000 0x4000>; - interrupts = <0 84 0x04>; - clocks = <&clks 62>, <&clks 146>; - clock-names = "ipg", "per"; - }; - - pwm3: pwm@02088000 { - #pwm-cells = <2>; - compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm"; - reg = <0x02088000 0x4000>; - interrupts = <0 85 0x04>; - clocks = <&clks 62>, <&clks 147>; - clock-names = "ipg", "per"; - }; - - pwm4: pwm@0208c000 { - #pwm-cells = <2>; - compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm"; - reg = <0x0208c000 0x4000>; - interrupts = <0 86 0x04>; - clocks = <&clks 62>, <&clks 148>; - clock-names = "ipg", "per"; - }; - - can1: flexcan@02090000 { - reg = <0x02090000 0x4000>; - interrupts = <0 110 0x04>; - }; - - can2: flexcan@02094000 { - reg = <0x02094000 0x4000>; - interrupts = <0 111 0x04>; - }; - - gpt: gpt@02098000 { - compatible = "fsl,imx6q-gpt"; - reg = <0x02098000 0x4000>; - interrupts = <0 55 0x04>; - }; - - gpio1: gpio@0209c000 { - compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; - reg = <0x0209c000 0x4000>; - interrupts = <0 66 0x04 0 67 0x04>; - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - }; - - gpio2: gpio@020a0000 { - compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; - reg = <0x020a0000 0x4000>; - interrupts = <0 68 0x04 0 69 0x04>; - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - }; - - gpio3: gpio@020a4000 { - compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; - reg = <0x020a4000 0x4000>; - interrupts = <0 70 0x04 0 71 0x04>; - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - }; - - gpio4: gpio@020a8000 { - compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; - reg = <0x020a8000 0x4000>; - interrupts = <0 72 0x04 0 73 0x04>; - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - }; - - gpio5: gpio@020ac000 { - compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; - reg = <0x020ac000 0x4000>; - interrupts = <0 74 0x04 0 75 0x04>; - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - }; - - gpio6: gpio@020b0000 { - compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; - reg = <0x020b0000 0x4000>; - interrupts = <0 76 0x04 0 77 0x04>; - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - }; - - gpio7: gpio@020b4000 { - compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; - reg = <0x020b4000 0x4000>; - interrupts = <0 78 0x04 0 79 0x04>; - gpio-controller; - #gpio-cells = <2>; - interrupt-controller; - #interrupt-cells = <2>; - }; - - kpp: kpp@020b8000 { - reg = <0x020b8000 0x4000>; - interrupts = <0 82 0x04>; - }; - - wdog1: wdog@020bc000 { - compatible = "fsl,imx6q-wdt", "fsl,imx21-wdt"; - reg = <0x020bc000 0x4000>; - interrupts = <0 80 0x04>; - clocks = <&clks 0>; - }; - - wdog2: wdog@020c0000 { - compatible = "fsl,imx6q-wdt", "fsl,imx21-wdt"; - reg = <0x020c0000 0x4000>; - interrupts = <0 81 0x04>; - clocks = <&clks 0>; - status = "disabled"; - }; - - clks: ccm@020c4000 { - compatible = "fsl,imx6q-ccm"; - reg = <0x020c4000 0x4000>; - interrupts = <0 87 0x04 0 88 0x04>; - #clock-cells = <1>; - }; - - anatop: anatop@020c8000 { - compatible = "fsl,imx6q-anatop", "syscon", "simple-bus"; - reg = <0x020c8000 0x1000>; - interrupts = <0 49 0x04 0 54 0x04 0 127 0x04>; - - regulator-1p1@110 { - compatible = "fsl,anatop-regulator"; - regulator-name = "vdd1p1"; - regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1375000>; - regulator-always-on; - anatop-reg-offset = <0x110>; - anatop-vol-bit-shift = <8>; - anatop-vol-bit-width = <5>; - anatop-min-bit-val = <4>; - anatop-min-voltage = <800000>; - anatop-max-voltage = <1375000>; - }; - - regulator-3p0@120 { - compatible = "fsl,anatop-regulator"; - regulator-name = "vdd3p0"; - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <3150000>; - regulator-always-on; - anatop-reg-offset = <0x120>; - anatop-vol-bit-shift = <8>; - anatop-vol-bit-width = <5>; - anatop-min-bit-val = <0>; - anatop-min-voltage = <2625000>; - anatop-max-voltage = <3400000>; - }; - - regulator-2p5@130 { - compatible = "fsl,anatop-regulator"; - regulator-name = "vdd2p5"; - regulator-min-microvolt = <2000000>; - regulator-max-microvolt = <2750000>; - regulator-always-on; - anatop-reg-offset = <0x130>; - anatop-vol-bit-shift = <8>; - anatop-vol-bit-width = <5>; - anatop-min-bit-val = <0>; - anatop-min-voltage = <2000000>; - anatop-max-voltage = <2750000>; - }; - - reg_cpu: regulator-vddcore@140 { - compatible = "fsl,anatop-regulator"; - regulator-name = "cpu"; - regulator-min-microvolt = <725000>; - regulator-max-microvolt = <1450000>; - regulator-always-on; - anatop-reg-offset = <0x140>; - anatop-vol-bit-shift = <0>; - anatop-vol-bit-width = <5>; - anatop-min-bit-val = <1>; - anatop-min-voltage = <725000>; - anatop-max-voltage = <1450000>; - }; - - regulator-vddpu@140 { - compatible = "fsl,anatop-regulator"; - regulator-name = "vddpu"; - regulator-min-microvolt = <725000>; - regulator-max-microvolt = <1450000>; - regulator-always-on; - anatop-reg-offset = <0x140>; - anatop-vol-bit-shift = <9>; - anatop-vol-bit-width = <5>; - anatop-min-bit-val = <1>; - anatop-min-voltage = <725000>; - anatop-max-voltage = <1450000>; - }; - - regulator-vddsoc@140 { - compatible = "fsl,anatop-regulator"; - regulator-name = "vddsoc"; - regulator-min-microvolt = <725000>; - regulator-max-microvolt = <1450000>; - regulator-always-on; - anatop-reg-offset = <0x140>; - anatop-vol-bit-shift = <18>; - anatop-vol-bit-width = <5>; - anatop-min-bit-val = <1>; - anatop-min-voltage = <725000>; - anatop-max-voltage = <1450000>; - }; - }; - - usbphy1: usbphy@020c9000 { - compatible = "fsl,imx6q-usbphy", "fsl,imx23-usbphy"; - reg = <0x020c9000 0x1000>; - interrupts = <0 44 0x04>; - clocks = <&clks 182>; - }; - - usbphy2: usbphy@020ca000 { - compatible = "fsl,imx6q-usbphy", "fsl,imx23-usbphy"; - reg = <0x020ca000 0x1000>; - interrupts = <0 45 0x04>; - clocks = <&clks 183>; - }; - - snvs@020cc000 { - compatible = "fsl,sec-v4.0-mon", "simple-bus"; - #address-cells = <1>; - #size-cells = <1>; - ranges = <0 0x020cc000 0x4000>; - - snvs-rtc-lp@34 { - compatible = "fsl,sec-v4.0-mon-rtc-lp"; - reg = <0x34 0x58>; - interrupts = <0 19 0x04 0 20 0x04>; - }; - }; - - epit1: epit@020d0000 { /* EPIT1 */ - reg = <0x020d0000 0x4000>; - interrupts = <0 56 0x04>; - }; - - epit2: epit@020d4000 { /* EPIT2 */ - reg = <0x020d4000 0x4000>; - interrupts = <0 57 0x04>; - }; - - src: src@020d8000 { - compatible = "fsl,imx6q-src"; - reg = <0x020d8000 0x4000>; - interrupts = <0 91 0x04 0 96 0x04>; - }; - - gpc: gpc@020dc000 { - compatible = "fsl,imx6q-gpc"; - reg = <0x020dc000 0x4000>; - interrupts = <0 89 0x04 0 90 0x04>; - }; - - gpr: iomuxc-gpr@020e0000 { - compatible = "fsl,imx6q-iomuxc-gpr", "syscon"; - reg = <0x020e0000 0x38>; }; iomuxc: iomuxc@020e0000 { @@ -780,272 +282,6 @@ }; }; }; - - dcic1: dcic@020e4000 { - reg = <0x020e4000 0x4000>; - interrupts = <0 124 0x04>; - }; - - dcic2: dcic@020e8000 { - reg = <0x020e8000 0x4000>; - interrupts = <0 125 0x04>; - }; - - sdma: sdma@020ec000 { - compatible = "fsl,imx6q-sdma", "fsl,imx35-sdma"; - reg = <0x020ec000 0x4000>; - interrupts = <0 2 0x04>; - clocks = <&clks 155>, <&clks 155>; - clock-names = "ipg", "ahb"; - fsl,sdma-ram-script-name = "imx/sdma/sdma-imx6q-to1.bin"; - }; - }; - - aips-bus@02100000 { /* AIPS2 */ - compatible = "fsl,aips-bus", "simple-bus"; - #address-cells = <1>; - #size-cells = <1>; - reg = <0x02100000 0x100000>; - ranges; - - caam@02100000 { - reg = <0x02100000 0x40000>; - interrupts = <0 105 0x04 0 106 0x04>; - }; - - aipstz@0217c000 { /* AIPSTZ2 */ - reg = <0x0217c000 0x4000>; - }; - - usbotg: usb@02184000 { - compatible = "fsl,imx6q-usb", "fsl,imx27-usb"; - reg = <0x02184000 0x200>; - interrupts = <0 43 0x04>; - clocks = <&clks 162>; - fsl,usbphy = <&usbphy1>; - fsl,usbmisc = <&usbmisc 0>; - status = "disabled"; - }; - - usbh1: usb@02184200 { - compatible = "fsl,imx6q-usb", "fsl,imx27-usb"; - reg = <0x02184200 0x200>; - interrupts = <0 40 0x04>; - clocks = <&clks 162>; - fsl,usbphy = <&usbphy2>; - fsl,usbmisc = <&usbmisc 1>; - status = "disabled"; - }; - - usbh2: usb@02184400 { - compatible = "fsl,imx6q-usb", "fsl,imx27-usb"; - reg = <0x02184400 0x200>; - interrupts = <0 41 0x04>; - clocks = <&clks 162>; - fsl,usbmisc = <&usbmisc 2>; - status = "disabled"; - }; - - usbh3: usb@02184600 { - compatible = "fsl,imx6q-usb", "fsl,imx27-usb"; - reg = <0x02184600 0x200>; - interrupts = <0 42 0x04>; - clocks = <&clks 162>; - fsl,usbmisc = <&usbmisc 3>; - status = "disabled"; - }; - - usbmisc: usbmisc: usbmisc@02184800 { - #index-cells = <1>; - compatible = "fsl,imx6q-usbmisc"; - reg = <0x02184800 0x200>; - clocks = <&clks 162>; - }; - - fec: ethernet@02188000 { - compatible = "fsl,imx6q-fec"; - reg = <0x02188000 0x4000>; - interrupts = <0 118 0x04 0 119 0x04>; - clocks = <&clks 117>, <&clks 117>, <&clks 190>; - clock-names = "ipg", "ahb", "ptp"; - status = "disabled"; - }; - - mlb@0218c000 { - reg = <0x0218c000 0x4000>; - interrupts = <0 53 0x04 0 117 0x04 0 126 0x04>; - }; - - usdhc1: usdhc@02190000 { - compatible = "fsl,imx6q-usdhc"; - reg = <0x02190000 0x4000>; - interrupts = <0 22 0x04>; - clocks = <&clks 163>, <&clks 163>, <&clks 163>; - clock-names = "ipg", "ahb", "per"; - bus-width = <4>; - status = "disabled"; - }; - - usdhc2: usdhc@02194000 { - compatible = "fsl,imx6q-usdhc"; - reg = <0x02194000 0x4000>; - interrupts = <0 23 0x04>; - clocks = <&clks 164>, <&clks 164>, <&clks 164>; - clock-names = "ipg", "ahb", "per"; - bus-width = <4>; - status = "disabled"; - }; - - usdhc3: usdhc@02198000 { - compatible = "fsl,imx6q-usdhc"; - reg = <0x02198000 0x4000>; - interrupts = <0 24 0x04>; - clocks = <&clks 165>, <&clks 165>, <&clks 165>; - clock-names = "ipg", "ahb", "per"; - bus-width = <4>; - status = "disabled"; - }; - - usdhc4: usdhc@0219c000 { - compatible = "fsl,imx6q-usdhc"; - reg = <0x0219c000 0x4000>; - interrupts = <0 25 0x04>; - clocks = <&clks 166>, <&clks 166>, <&clks 166>; - clock-names = "ipg", "ahb", "per"; - bus-width = <4>; - status = "disabled"; - }; - - i2c1: i2c@021a0000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c"; - reg = <0x021a0000 0x4000>; - interrupts = <0 36 0x04>; - clocks = <&clks 125>; - status = "disabled"; - }; - - i2c2: i2c@021a4000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c"; - reg = <0x021a4000 0x4000>; - interrupts = <0 37 0x04>; - clocks = <&clks 126>; - status = "disabled"; - }; - - i2c3: i2c@021a8000 { - #address-cells = <1>; - #size-cells = <0>; - compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c"; - reg = <0x021a8000 0x4000>; - interrupts = <0 38 0x04>; - clocks = <&clks 127>; - status = "disabled"; - }; - - romcp@021ac000 { - reg = <0x021ac000 0x4000>; - }; - - mmdc0: mmdc@021b0000 { /* MMDC0 */ - compatible = "fsl,imx6q-mmdc"; - reg = <0x021b0000 0x4000>; - }; - - mmdc1: mmdc@021b4000 { /* MMDC1 */ - reg = <0x021b4000 0x4000>; - }; - - weim@021b8000 { - reg = <0x021b8000 0x4000>; - interrupts = <0 14 0x04>; - }; - - ocotp@021bc000 { - reg = <0x021bc000 0x4000>; - }; - - ocotp@021c0000 { - reg = <0x021c0000 0x4000>; - interrupts = <0 21 0x04>; - }; - - tzasc@021d0000 { /* TZASC1 */ - reg = <0x021d0000 0x4000>; - interrupts = <0 108 0x04>; - }; - - tzasc@021d4000 { /* TZASC2 */ - reg = <0x021d4000 0x4000>; - interrupts = <0 109 0x04>; - }; - - audmux: audmux@021d8000 { - compatible = "fsl,imx6q-audmux", "fsl,imx31-audmux"; - reg = <0x021d8000 0x4000>; - status = "disabled"; - }; - - mipi@021dc000 { /* MIPI-CSI */ - reg = <0x021dc000 0x4000>; - }; - - mipi@021e0000 { /* MIPI-DSI */ - reg = <0x021e0000 0x4000>; - }; - - vdoa@021e4000 { - reg = <0x021e4000 0x4000>; - interrupts = <0 18 0x04>; - }; - - uart2: serial@021e8000 { - compatible = "fsl,imx6q-uart", "fsl,imx21-uart"; - reg = <0x021e8000 0x4000>; - interrupts = <0 27 0x04>; - clocks = <&clks 160>, <&clks 161>; - clock-names = "ipg", "per"; - status = "disabled"; - }; - - uart3: serial@021ec000 { - compatible = "fsl,imx6q-uart", "fsl,imx21-uart"; - reg = <0x021ec000 0x4000>; - interrupts = <0 28 0x04>; - clocks = <&clks 160>, <&clks 161>; - clock-names = "ipg", "per"; - status = "disabled"; - }; - - uart4: serial@021f0000 { - compatible = "fsl,imx6q-uart", "fsl,imx21-uart"; - reg = <0x021f0000 0x4000>; - interrupts = <0 29 0x04>; - clocks = <&clks 160>, <&clks 161>; - clock-names = "ipg", "per"; - status = "disabled"; - }; - - uart5: serial@021f4000 { - compatible = "fsl,imx6q-uart", "fsl,imx21-uart"; - reg = <0x021f4000 0x4000>; - interrupts = <0 30 0x04>; - clocks = <&clks 160>, <&clks 161>; - clock-names = "ipg", "per"; - status = "disabled"; - }; - }; - - ipu1: ipu@02400000 { - #crtc-cells = <1>; - compatible = "fsl,imx6q-ipu"; - reg = <0x02400000 0x400000>; - interrupts = <0 6 0x4 0 5 0x4>; - clocks = <&clks 130>, <&clks 131>, <&clks 132>; - clock-names = "bus", "di0", "di1"; }; ipu2: ipu@02800000 { diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi new file mode 100644 index 00000000000..06ec460b458 --- /dev/null +++ b/arch/arm/boot/dts/imx6qdl.dtsi @@ -0,0 +1,800 @@ +/* + * Copyright 2011 Freescale Semiconductor, Inc. + * Copyright 2011 Linaro Ltd. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +/include/ "skeleton.dtsi" + +/ { + aliases { + serial0 = &uart1; + serial1 = &uart2; + serial2 = &uart3; + serial3 = &uart4; + serial4 = &uart5; + gpio0 = &gpio1; + gpio1 = &gpio2; + gpio2 = &gpio3; + gpio3 = &gpio4; + gpio4 = &gpio5; + gpio5 = &gpio6; + gpio6 = &gpio7; + }; + + intc: interrupt-controller@00a01000 { + compatible = "arm,cortex-a9-gic"; + #interrupt-cells = <3>; + #address-cells = <1>; + #size-cells = <1>; + interrupt-controller; + reg = <0x00a01000 0x1000>, + <0x00a00100 0x100>; + }; + + clocks { + #address-cells = <1>; + #size-cells = <0>; + + ckil { + compatible = "fsl,imx-ckil", "fixed-clock"; + clock-frequency = <32768>; + }; + + ckih1 { + compatible = "fsl,imx-ckih1", "fixed-clock"; + clock-frequency = <0>; + }; + + osc { + compatible = "fsl,imx-osc", "fixed-clock"; + clock-frequency = <24000000>; + }; + }; + + soc { + #address-cells = <1>; + #size-cells = <1>; + compatible = "simple-bus"; + interrupt-parent = <&intc>; + ranges; + + dma-apbh@00110000 { + compatible = "fsl,imx6q-dma-apbh", "fsl,imx28-dma-apbh"; + reg = <0x00110000 0x2000>; + clocks = <&clks 106>; + }; + + gpmi: gpmi-nand@00112000 { + compatible = "fsl,imx6q-gpmi-nand"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x00112000 0x2000>, <0x00114000 0x2000>; + reg-names = "gpmi-nand", "bch"; + interrupts = <0 13 0x04>, <0 15 0x04>; + interrupt-names = "gpmi-dma", "bch"; + clocks = <&clks 152>, <&clks 153>, <&clks 151>, + <&clks 150>, <&clks 149>; + clock-names = "gpmi_io", "gpmi_apb", "gpmi_bch", + "gpmi_bch_apb", "per1_bch"; + fsl,gpmi-dma-channel = <0>; + status = "disabled"; + }; + + timer@00a00600 { + compatible = "arm,cortex-a9-twd-timer"; + reg = <0x00a00600 0x20>; + interrupts = <1 13 0xf01>; + }; + + L2: l2-cache@00a02000 { + compatible = "arm,pl310-cache"; + reg = <0x00a02000 0x1000>; + interrupts = <0 92 0x04>; + cache-unified; + cache-level = <2>; + }; + + aips-bus@02000000 { /* AIPS1 */ + compatible = "fsl,aips-bus", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x02000000 0x100000>; + ranges; + + spba-bus@02000000 { + compatible = "fsl,spba-bus", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x02000000 0x40000>; + ranges; + + spdif: spdif@02004000 { + reg = <0x02004000 0x4000>; + interrupts = <0 52 0x04>; + }; + + ecspi1: ecspi@02008000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi"; + reg = <0x02008000 0x4000>; + interrupts = <0 31 0x04>; + clocks = <&clks 112>, <&clks 112>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + + ecspi2: ecspi@0200c000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi"; + reg = <0x0200c000 0x4000>; + interrupts = <0 32 0x04>; + clocks = <&clks 113>, <&clks 113>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + + ecspi3: ecspi@02010000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi"; + reg = <0x02010000 0x4000>; + interrupts = <0 33 0x04>; + clocks = <&clks 114>, <&clks 114>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + + ecspi4: ecspi@02014000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,imx6q-ecspi", "fsl,imx51-ecspi"; + reg = <0x02014000 0x4000>; + interrupts = <0 34 0x04>; + clocks = <&clks 115>, <&clks 115>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + + uart1: serial@02020000 { + compatible = "fsl,imx6q-uart", "fsl,imx21-uart"; + reg = <0x02020000 0x4000>; + interrupts = <0 26 0x04>; + clocks = <&clks 160>, <&clks 161>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + + esai: esai@02024000 { + reg = <0x02024000 0x4000>; + interrupts = <0 51 0x04>; + }; + + ssi1: ssi@02028000 { + compatible = "fsl,imx6q-ssi","fsl,imx21-ssi"; + reg = <0x02028000 0x4000>; + interrupts = <0 46 0x04>; + clocks = <&clks 178>; + fsl,fifo-depth = <15>; + fsl,ssi-dma-events = <38 37>; + status = "disabled"; + }; + + ssi2: ssi@0202c000 { + compatible = "fsl,imx6q-ssi","fsl,imx21-ssi"; + reg = <0x0202c000 0x4000>; + interrupts = <0 47 0x04>; + clocks = <&clks 179>; + fsl,fifo-depth = <15>; + fsl,ssi-dma-events = <42 41>; + status = "disabled"; + }; + + ssi3: ssi@02030000 { + compatible = "fsl,imx6q-ssi","fsl,imx21-ssi"; + reg = <0x02030000 0x4000>; + interrupts = <0 48 0x04>; + clocks = <&clks 180>; + fsl,fifo-depth = <15>; + fsl,ssi-dma-events = <46 45>; + status = "disabled"; + }; + + asrc: asrc@02034000 { + reg = <0x02034000 0x4000>; + interrupts = <0 50 0x04>; + }; + + spba@0203c000 { + reg = <0x0203c000 0x4000>; + }; + }; + + vpu: vpu@02040000 { + reg = <0x02040000 0x3c000>; + interrupts = <0 3 0x04 0 12 0x04>; + }; + + aipstz@0207c000 { /* AIPSTZ1 */ + reg = <0x0207c000 0x4000>; + }; + + pwm1: pwm@02080000 { + #pwm-cells = <2>; + compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm"; + reg = <0x02080000 0x4000>; + interrupts = <0 83 0x04>; + clocks = <&clks 62>, <&clks 145>; + clock-names = "ipg", "per"; + }; + + pwm2: pwm@02084000 { + #pwm-cells = <2>; + compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm"; + reg = <0x02084000 0x4000>; + interrupts = <0 84 0x04>; + clocks = <&clks 62>, <&clks 146>; + clock-names = "ipg", "per"; + }; + + pwm3: pwm@02088000 { + #pwm-cells = <2>; + compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm"; + reg = <0x02088000 0x4000>; + interrupts = <0 85 0x04>; + clocks = <&clks 62>, <&clks 147>; + clock-names = "ipg", "per"; + }; + + pwm4: pwm@0208c000 { + #pwm-cells = <2>; + compatible = "fsl,imx6q-pwm", "fsl,imx27-pwm"; + reg = <0x0208c000 0x4000>; + interrupts = <0 86 0x04>; + clocks = <&clks 62>, <&clks 148>; + clock-names = "ipg", "per"; + }; + + can1: flexcan@02090000 { + reg = <0x02090000 0x4000>; + interrupts = <0 110 0x04>; + }; + + can2: flexcan@02094000 { + reg = <0x02094000 0x4000>; + interrupts = <0 111 0x04>; + }; + + gpt: gpt@02098000 { + compatible = "fsl,imx6q-gpt"; + reg = <0x02098000 0x4000>; + interrupts = <0 55 0x04>; + }; + + gpio1: gpio@0209c000 { + compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; + reg = <0x0209c000 0x4000>; + interrupts = <0 66 0x04 0 67 0x04>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + }; + + gpio2: gpio@020a0000 { + compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; + reg = <0x020a0000 0x4000>; + interrupts = <0 68 0x04 0 69 0x04>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + }; + + gpio3: gpio@020a4000 { + compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; + reg = <0x020a4000 0x4000>; + interrupts = <0 70 0x04 0 71 0x04>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + }; + + gpio4: gpio@020a8000 { + compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; + reg = <0x020a8000 0x4000>; + interrupts = <0 72 0x04 0 73 0x04>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + }; + + gpio5: gpio@020ac000 { + compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; + reg = <0x020ac000 0x4000>; + interrupts = <0 74 0x04 0 75 0x04>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + }; + + gpio6: gpio@020b0000 { + compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; + reg = <0x020b0000 0x4000>; + interrupts = <0 76 0x04 0 77 0x04>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + }; + + gpio7: gpio@020b4000 { + compatible = "fsl,imx6q-gpio", "fsl,imx35-gpio"; + reg = <0x020b4000 0x4000>; + interrupts = <0 78 0x04 0 79 0x04>; + gpio-controller; + #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; + }; + + kpp: kpp@020b8000 { + reg = <0x020b8000 0x4000>; + interrupts = <0 82 0x04>; + }; + + wdog1: wdog@020bc000 { + compatible = "fsl,imx6q-wdt", "fsl,imx21-wdt"; + reg = <0x020bc000 0x4000>; + interrupts = <0 80 0x04>; + clocks = <&clks 0>; + }; + + wdog2: wdog@020c0000 { + compatible = "fsl,imx6q-wdt", "fsl,imx21-wdt"; + reg = <0x020c0000 0x4000>; + interrupts = <0 81 0x04>; + clocks = <&clks 0>; + status = "disabled"; + }; + + clks: ccm@020c4000 { + compatible = "fsl,imx6q-ccm"; + reg = <0x020c4000 0x4000>; + interrupts = <0 87 0x04 0 88 0x04>; + #clock-cells = <1>; + }; + + anatop: anatop@020c8000 { + compatible = "fsl,imx6q-anatop", "syscon", "simple-bus"; + reg = <0x020c8000 0x1000>; + interrupts = <0 49 0x04 0 54 0x04 0 127 0x04>; + + regulator-1p1@110 { + compatible = "fsl,anatop-regulator"; + regulator-name = "vdd1p1"; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <1375000>; + regulator-always-on; + anatop-reg-offset = <0x110>; + anatop-vol-bit-shift = <8>; + anatop-vol-bit-width = <5>; + anatop-min-bit-val = <4>; + anatop-min-voltage = <800000>; + anatop-max-voltage = <1375000>; + }; + + regulator-3p0@120 { + compatible = "fsl,anatop-regulator"; + regulator-name = "vdd3p0"; + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <3150000>; + regulator-always-on; + anatop-reg-offset = <0x120>; + anatop-vol-bit-shift = <8>; + anatop-vol-bit-width = <5>; + anatop-min-bit-val = <0>; + anatop-min-voltage = <2625000>; + anatop-max-voltage = <3400000>; + }; + + regulator-2p5@130 { + compatible = "fsl,anatop-regulator"; + regulator-name = "vdd2p5"; + regulator-min-microvolt = <2000000>; + regulator-max-microvolt = <2750000>; + regulator-always-on; + anatop-reg-offset = <0x130>; + anatop-vol-bit-shift = <8>; + anatop-vol-bit-width = <5>; + anatop-min-bit-val = <0>; + anatop-min-voltage = <2000000>; + anatop-max-voltage = <2750000>; + }; + + reg_arm: regulator-vddcore@140 { + compatible = "fsl,anatop-regulator"; + regulator-name = "cpu"; + regulator-min-microvolt = <725000>; + regulator-max-microvolt = <1450000>; + regulator-always-on; + anatop-reg-offset = <0x140>; + anatop-vol-bit-shift = <0>; + anatop-vol-bit-width = <5>; + anatop-delay-reg-offset = <0x170>; + anatop-delay-bit-shift = <24>; + anatop-delay-bit-width = <2>; + anatop-min-bit-val = <1>; + anatop-min-voltage = <725000>; + anatop-max-voltage = <1450000>; + }; + + reg_pu: regulator-vddpu@140 { + compatible = "fsl,anatop-regulator"; + regulator-name = "vddpu"; + regulator-min-microvolt = <725000>; + regulator-max-microvolt = <1450000>; + regulator-always-on; + anatop-reg-offset = <0x140>; + anatop-vol-bit-shift = <9>; + anatop-vol-bit-width = <5>; + anatop-delay-reg-offset = <0x170>; + anatop-delay-bit-shift = <26>; + anatop-delay-bit-width = <2>; + anatop-min-bit-val = <1>; + anatop-min-voltage = <725000>; + anatop-max-voltage = <1450000>; + }; + + reg_soc: regulator-vddsoc@140 { + compatible = "fsl,anatop-regulator"; + regulator-name = "vddsoc"; + regulator-min-microvolt = <725000>; + regulator-max-microvolt = <1450000>; + regulator-always-on; + anatop-reg-offset = <0x140>; + anatop-vol-bit-shift = <18>; + anatop-vol-bit-width = <5>; + anatop-delay-reg-offset = <0x170>; + anatop-delay-bit-shift = <28>; + anatop-delay-bit-width = <2>; + anatop-min-bit-val = <1>; + anatop-min-voltage = <725000>; + anatop-max-voltage = <1450000>; + }; + }; + + usbphy1: usbphy@020c9000 { + compatible = "fsl,imx6q-usbphy", "fsl,imx23-usbphy"; + reg = <0x020c9000 0x1000>; + interrupts = <0 44 0x04>; + clocks = <&clks 182>; + }; + + usbphy2: usbphy@020ca000 { + compatible = "fsl,imx6q-usbphy", "fsl,imx23-usbphy"; + reg = <0x020ca000 0x1000>; + interrupts = <0 45 0x04>; + clocks = <&clks 183>; + }; + + snvs@020cc000 { + compatible = "fsl,sec-v4.0-mon", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0x020cc000 0x4000>; + + snvs-rtc-lp@34 { + compatible = "fsl,sec-v4.0-mon-rtc-lp"; + reg = <0x34 0x58>; + interrupts = <0 19 0x04 0 20 0x04>; + }; + }; + + epit1: epit@020d0000 { /* EPIT1 */ + reg = <0x020d0000 0x4000>; + interrupts = <0 56 0x04>; + }; + + epit2: epit@020d4000 { /* EPIT2 */ + reg = <0x020d4000 0x4000>; + interrupts = <0 57 0x04>; + }; + + src: src@020d8000 { + compatible = "fsl,imx6q-src"; + reg = <0x020d8000 0x4000>; + interrupts = <0 91 0x04 0 96 0x04>; + }; + + gpc: gpc@020dc000 { + compatible = "fsl,imx6q-gpc"; + reg = <0x020dc000 0x4000>; + interrupts = <0 89 0x04 0 90 0x04>; + }; + + gpr: iomuxc-gpr@020e0000 { + compatible = "fsl,imx6q-iomuxc-gpr", "syscon"; + reg = <0x020e0000 0x38>; + }; + + dcic1: dcic@020e4000 { + reg = <0x020e4000 0x4000>; + interrupts = <0 124 0x04>; + }; + + dcic2: dcic@020e8000 { + reg = <0x020e8000 0x4000>; + interrupts = <0 125 0x04>; + }; + + sdma: sdma@020ec000 { + compatible = "fsl,imx6q-sdma", "fsl,imx35-sdma"; + reg = <0x020ec000 0x4000>; + interrupts = <0 2 0x04>; + clocks = <&clks 155>, <&clks 155>; + clock-names = "ipg", "ahb"; + fsl,sdma-ram-script-name = "imx/sdma/sdma-imx6q.bin"; + }; + }; + + aips-bus@02100000 { /* AIPS2 */ + compatible = "fsl,aips-bus", "simple-bus"; + #address-cells = <1>; + #size-cells = <1>; + reg = <0x02100000 0x100000>; + ranges; + + caam@02100000 { + reg = <0x02100000 0x40000>; + interrupts = <0 105 0x04 0 106 0x04>; + }; + + aipstz@0217c000 { /* AIPSTZ2 */ + reg = <0x0217c000 0x4000>; + }; + + usbotg: usb@02184000 { + compatible = "fsl,imx6q-usb", "fsl,imx27-usb"; + reg = <0x02184000 0x200>; + interrupts = <0 43 0x04>; + clocks = <&clks 162>; + fsl,usbphy = <&usbphy1>; + fsl,usbmisc = <&usbmisc 0>; + status = "disabled"; + }; + + usbh1: usb@02184200 { + compatible = "fsl,imx6q-usb", "fsl,imx27-usb"; + reg = <0x02184200 0x200>; + interrupts = <0 40 0x04>; + clocks = <&clks 162>; + fsl,usbphy = <&usbphy2>; + fsl,usbmisc = <&usbmisc 1>; + status = "disabled"; + }; + + usbh2: usb@02184400 { + compatible = "fsl,imx6q-usb", "fsl,imx27-usb"; + reg = <0x02184400 0x200>; + interrupts = <0 41 0x04>; + clocks = <&clks 162>; + fsl,usbmisc = <&usbmisc 2>; + status = "disabled"; + }; + + usbh3: usb@02184600 { + compatible = "fsl,imx6q-usb", "fsl,imx27-usb"; + reg = <0x02184600 0x200>; + interrupts = <0 42 0x04>; + clocks = <&clks 162>; + fsl,usbmisc = <&usbmisc 3>; + status = "disabled"; + }; + + usbmisc: usbmisc: usbmisc@02184800 { + #index-cells = <1>; + compatible = "fsl,imx6q-usbmisc"; + reg = <0x02184800 0x200>; + clocks = <&clks 162>; + }; + + fec: ethernet@02188000 { + compatible = "fsl,imx6q-fec"; + reg = <0x02188000 0x4000>; + interrupts = <0 118 0x04 0 119 0x04>; + clocks = <&clks 117>, <&clks 117>, <&clks 190>; + clock-names = "ipg", "ahb", "ptp"; + status = "disabled"; + }; + + mlb@0218c000 { + reg = <0x0218c000 0x4000>; + interrupts = <0 53 0x04 0 117 0x04 0 126 0x04>; + }; + + usdhc1: usdhc@02190000 { + compatible = "fsl,imx6q-usdhc"; + reg = <0x02190000 0x4000>; + interrupts = <0 22 0x04>; + clocks = <&clks 163>, <&clks 163>, <&clks 163>; + clock-names = "ipg", "ahb", "per"; + bus-width = <4>; + status = "disabled"; + }; + + usdhc2: usdhc@02194000 { + compatible = "fsl,imx6q-usdhc"; + reg = <0x02194000 0x4000>; + interrupts = <0 23 0x04>; + clocks = <&clks 164>, <&clks 164>, <&clks 164>; + clock-names = "ipg", "ahb", "per"; + bus-width = <4>; + status = "disabled"; + }; + + usdhc3: usdhc@02198000 { + compatible = "fsl,imx6q-usdhc"; + reg = <0x02198000 0x4000>; + interrupts = <0 24 0x04>; + clocks = <&clks 165>, <&clks 165>, <&clks 165>; + clock-names = "ipg", "ahb", "per"; + bus-width = <4>; + status = "disabled"; + }; + + usdhc4: usdhc@0219c000 { + compatible = "fsl,imx6q-usdhc"; + reg = <0x0219c000 0x4000>; + interrupts = <0 25 0x04>; + clocks = <&clks 166>, <&clks 166>, <&clks 166>; + clock-names = "ipg", "ahb", "per"; + bus-width = <4>; + status = "disabled"; + }; + + i2c1: i2c@021a0000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c"; + reg = <0x021a0000 0x4000>; + interrupts = <0 36 0x04>; + clocks = <&clks 125>; + status = "disabled"; + }; + + i2c2: i2c@021a4000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c"; + reg = <0x021a4000 0x4000>; + interrupts = <0 37 0x04>; + clocks = <&clks 126>; + status = "disabled"; + }; + + i2c3: i2c@021a8000 { + #address-cells = <1>; + #size-cells = <0>; + compatible = "fsl,imx6q-i2c", "fsl,imx21-i2c"; + reg = <0x021a8000 0x4000>; + interrupts = <0 38 0x04>; + clocks = <&clks 127>; + status = "disabled"; + }; + + romcp@021ac000 { + reg = <0x021ac000 0x4000>; + }; + + mmdc0: mmdc@021b0000 { /* MMDC0 */ + compatible = "fsl,imx6q-mmdc"; + reg = <0x021b0000 0x4000>; + }; + + mmdc1: mmdc@021b4000 { /* MMDC1 */ + reg = <0x021b4000 0x4000>; + }; + + weim@021b8000 { + reg = <0x021b8000 0x4000>; + interrupts = <0 14 0x04>; + }; + + ocotp@021bc000 { + compatible = "fsl,imx6q-ocotp"; + reg = <0x021bc000 0x4000>; + }; + + ocotp@021c0000 { + reg = <0x021c0000 0x4000>; + interrupts = <0 21 0x04>; + }; + + tzasc@021d0000 { /* TZASC1 */ + reg = <0x021d0000 0x4000>; + interrupts = <0 108 0x04>; + }; + + tzasc@021d4000 { /* TZASC2 */ + reg = <0x021d4000 0x4000>; + interrupts = <0 109 0x04>; + }; + + audmux: audmux@021d8000 { + compatible = "fsl,imx6q-audmux", "fsl,imx31-audmux"; + reg = <0x021d8000 0x4000>; + status = "disabled"; + }; + + mipi@021dc000 { /* MIPI-CSI */ + reg = <0x021dc000 0x4000>; + }; + + mipi@021e0000 { /* MIPI-DSI */ + reg = <0x021e0000 0x4000>; + }; + + vdoa@021e4000 { + reg = <0x021e4000 0x4000>; + interrupts = <0 18 0x04>; + }; + + uart2: serial@021e8000 { + compatible = "fsl,imx6q-uart", "fsl,imx21-uart"; + reg = <0x021e8000 0x4000>; + interrupts = <0 27 0x04>; + clocks = <&clks 160>, <&clks 161>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + + uart3: serial@021ec000 { + compatible = "fsl,imx6q-uart", "fsl,imx21-uart"; + reg = <0x021ec000 0x4000>; + interrupts = <0 28 0x04>; + clocks = <&clks 160>, <&clks 161>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + + uart4: serial@021f0000 { + compatible = "fsl,imx6q-uart", "fsl,imx21-uart"; + reg = <0x021f0000 0x4000>; + interrupts = <0 29 0x04>; + clocks = <&clks 160>, <&clks 161>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + + uart5: serial@021f4000 { + compatible = "fsl,imx6q-uart", "fsl,imx21-uart"; + reg = <0x021f4000 0x4000>; + interrupts = <0 30 0x04>; + clocks = <&clks 160>, <&clks 161>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + }; + + ipu1: ipu@02400000 { + #crtc-cells = <1>; + compatible = "fsl,imx6q-ipu"; + reg = <0x02400000 0x400000>; + interrupts = <0 6 0x4 0 5 0x4>; + clocks = <&clks 130>, <&clks 131>, <&clks 132>; + clock-names = "bus", "di0", "di1"; + }; + }; +}; diff --git a/arch/arm/boot/dts/kirkwood-6282.dtsi b/arch/arm/boot/dts/kirkwood-6282.dtsi index 4ccea2130a6..192cf76fbf9 100644 --- a/arch/arm/boot/dts/kirkwood-6282.dtsi +++ b/arch/arm/boot/dts/kirkwood-6282.dtsi @@ -5,6 +5,12 @@ compatible = "marvell,88f6282-pinctrl"; reg = <0x10000 0x20>; + pmx_nand: pmx-nand { + marvell,pins = "mpp0", "mpp1", "mpp2", "mpp3", + "mpp4", "mpp5", "mpp18", "mpp19"; + marvell,function = "nand"; + }; + pmx_sata0: pmx-sata0 { marvell,pins = "mpp5", "mpp21", "mpp23"; marvell,function = "sata0"; @@ -21,6 +27,12 @@ marvell,pins = "mpp8", "mpp9"; marvell,function = "twsi0"; }; + + pmx_twsi1: pmx-twsi1 { + marvell,pins = "mpp36", "mpp37"; + marvell,function = "twsi1"; + }; + pmx_uart0: pmx-uart0 { marvell,pins = "mpp10", "mpp11"; marvell,function = "uart0"; @@ -30,6 +42,11 @@ marvell,pins = "mpp13", "mpp14"; marvell,function = "uart1"; }; + pmx_sdio: pmx-sdio { + marvell,pins = "mpp12", "mpp13", "mpp14", + "mpp15", "mpp16", "mpp17"; + marvell,function = "sdio"; + }; }; i2c@11100 { diff --git a/arch/arm/boot/dts/kirkwood-dreamplug.dts b/arch/arm/boot/dts/kirkwood-dreamplug.dts index f2d386c95b0..ef2d8c70570 100644 --- a/arch/arm/boot/dts/kirkwood-dreamplug.dts +++ b/arch/arm/boot/dts/kirkwood-dreamplug.dts @@ -74,6 +74,13 @@ status = "okay"; nr-ports = <1>; }; + + mvsdio@90000 { + pinctrl-0 = <&pmx_sdio>; + pinctrl-names = "default"; + status = "okay"; + /* No CD or WP GPIOs */ + }; }; gpio-leds { diff --git a/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts b/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts new file mode 100644 index 00000000000..9555a86297c --- /dev/null +++ b/arch/arm/boot/dts/kirkwood-guruplug-server-plus.dts @@ -0,0 +1,94 @@ +/dts-v1/; + +/include/ "kirkwood.dtsi" +/include/ "kirkwood-6281.dtsi" + +/ { + model = "Globalscale Technologies Guruplug Server Plus"; + compatible = "globalscale,guruplug-server-plus", "globalscale,guruplug", "marvell,kirkwood-88f6281", "marvell,kirkwood"; + + memory { + device_type = "memory"; + reg = <0x00000000 0x20000000>; + }; + + chosen { + bootargs = "console=ttyS0,115200n8 earlyprintk"; + }; + + ocp@f1000000 { + pinctrl: pinctrl@10000 { + + pinctrl-0 = < &pmx_led_health_r &pmx_led_health_g + &pmx_led_wmode_r &pmx_led_wmode_g >; + pinctrl-names = "default"; + + pmx_led_health_r: pmx-led-health-r { + marvell,pins = "mpp46"; + marvell,function = "gpio"; + }; + pmx_led_health_g: pmx-led-health-g { + marvell,pins = "mpp47"; + marvell,function = "gpio"; + }; + pmx_led_wmode_r: pmx-led-wmode-r { + marvell,pins = "mpp48"; + marvell,function = "gpio"; + }; + pmx_led_wmode_g: pmx-led-wmode-g { + marvell,pins = "mpp49"; + marvell,function = "gpio"; + }; + }; + serial@12000 { + clock-frequency = <200000000>; + status = "ok"; + }; + + nand@3000000 { + status = "okay"; + + partition@0 { + label = "u-boot"; + reg = <0x00000000 0x00100000>; + read-only; + }; + + partition@100000 { + label = "uImage"; + reg = <0x00100000 0x00400000>; + }; + + partition@500000 { + label = "data"; + reg = <0x00500000 0x1fb00000>; + }; + }; + + sata@80000 { + status = "okay"; + nr-ports = <1>; + }; + }; + + gpio-leds { + compatible = "gpio-leds"; + + health-r { + label = "guruplug:red:health"; + gpios = <&gpio1 14 1>; + }; + health-g { + label = "guruplug:green:health"; + gpios = <&gpio1 15 1>; + }; + wmode-r { + label = "guruplug:red:wmode"; + gpios = <&gpio1 16 1>; + }; + wmode-g { + label = "guruplug:green:wmode"; + gpios = <&gpio1 17 1>; + }; + }; +}; diff --git a/arch/arm/boot/dts/kirkwood-mplcec4.dts b/arch/arm/boot/dts/kirkwood-mplcec4.dts index 262c6540376..662dfd81b1c 100644 --- a/arch/arm/boot/dts/kirkwood-mplcec4.dts +++ b/arch/arm/boot/dts/kirkwood-mplcec4.dts @@ -20,12 +20,11 @@ pinctrl: pinctrl@10000 { pinctrl-0 = < &pmx_nand &pmx_uart0 - &pmx_led_health &pmx_sdio + &pmx_led_health &pmx_sata0 &pmx_sata1 &pmx_led_user1o &pmx_led_user1g &pmx_led_user0o &pmx_led_user0g &pmx_led_misc - &pmx_sdio_cd >; pinctrl-names = "default"; @@ -133,6 +132,14 @@ status = "okay"; }; + + mvsdio@90000 { + pinctrl-0 = <&pmx_sdio &pmx_sdio_cd>; + pinctrl-names = "default"; + status = "okay"; + cd-gpios = <&gpio1 15 0>; + /* No WP GPIO */ + }; }; gpio-leds { diff --git a/arch/arm/boot/dts/kirkwood-ns2-common.dtsi b/arch/arm/boot/dts/kirkwood-ns2-common.dtsi index 77d21abfcdf..e8e7ecef165 100644 --- a/arch/arm/boot/dts/kirkwood-ns2-common.dtsi +++ b/arch/arm/boot/dts/kirkwood-ns2-common.dtsi @@ -76,4 +76,10 @@ gpios = <&gpio0 12 0>; }; }; + + gpio_poweroff { + compatible = "gpio-poweroff"; + gpios = <&gpio0 31 0>; + }; + }; diff --git a/arch/arm/boot/dts/kirkwood-nsa310.dts b/arch/arm/boot/dts/kirkwood-nsa310.dts index 5509f965954..3a178cf708d 100644 --- a/arch/arm/boot/dts/kirkwood-nsa310.dts +++ b/arch/arm/boot/dts/kirkwood-nsa310.dts @@ -16,6 +16,105 @@ }; ocp@f1000000 { + pinctrl: pinctrl@10000 { + pinctrl-0 = < &pmx_led_esata_green + &pmx_led_esata_red + &pmx_led_usb_green + &pmx_led_usb_red + &pmx_usb_power_off + &pmx_led_sys_green + &pmx_led_sys_red + &pmx_btn_reset + &pmx_btn_copy + &pmx_led_copy_green + &pmx_led_copy_red + &pmx_led_hdd_green + &pmx_led_hdd_red + &pmx_unknown + &pmx_btn_power + &pmx_pwr_off >; + pinctrl-names = "default"; + + pmx_led_esata_green: pmx-led-esata-green { + marvell,pins = "mpp12"; + marvell,function = "gpio"; + }; + + pmx_led_esata_red: pmx-led-esata-red { + marvell,pins = "mpp13"; + marvell,function = "gpio"; + }; + + pmx_led_usb_green: pmx-led-usb-green { + marvell,pins = "mpp15"; + marvell,function = "gpio"; + }; + + pmx_led_usb_red: pmx-led-usb-red { + marvell,pins = "mpp16"; + marvell,function = "gpio"; + }; + + pmx_usb_power_off: pmx-usb-power-off { + marvell,pins = "mpp21"; + marvell,function = "gpio"; + }; + + pmx_led_sys_green: pmx-led-sys-green { + marvell,pins = "mpp28"; + marvell,function = "gpio"; + }; + + pmx_led_sys_red: pmx-led-sys-red { + marvell,pins = "mpp29"; + marvell,function = "gpio"; + }; + + pmx_btn_reset: pmx-btn-reset { + marvell,pins = "mpp36"; + marvell,function = "gpio"; + }; + + pmx_btn_copy: pmx-btn-copy { + marvell,pins = "mpp37"; + marvell,function = "gpio"; + }; + + pmx_led_copy_green: pmx-led-copy-green { + marvell,pins = "mpp39"; + marvell,function = "gpio"; + }; + + pmx_led_copy_red: pmx-led-copy-red { + marvell,pins = "mpp40"; + marvell,function = "gpio"; + }; + + pmx_led_hdd_green: pmx-led-hdd-green { + marvell,pins = "mpp41"; + marvell,function = "gpio"; + }; + + pmx_led_hdd_red: pmx-led-hdd-red { + marvell,pins = "mpp42"; + marvell,function = "gpio"; + }; + + pmx_unknown: pmx-unknown { + marvell,pins = "mpp44"; + marvell,function = "gpio"; + }; + + pmx_btn_power: pmx-btn-power { + marvell,pins = "mpp46"; + marvell,function = "gpio"; + }; + + pmx_pwr_off: pmx-pwr-off { + marvell,pins = "mpp48"; + marvell,function = "gpio"; + }; + }; serial@12000 { clock-frequency = <200000000>; @@ -29,6 +128,11 @@ i2c@11000 { status = "okay"; + + adt7476: adt7476a@2e { + compatible = "adt7476"; + reg = <0x2e>; + }; }; nand@3000000 { @@ -141,4 +245,26 @@ gpios = <&gpio1 8 0>; }; }; + + gpio_poweroff { + compatible = "gpio-poweroff"; + gpios = <&gpio1 16 0>; + }; + + regulators { + compatible = "simple-bus"; + #address-cells = <1>; + #size-cells = <0>; + + usb0_power_off: regulator@1 { + compatible = "regulator-fixed"; + reg = <1>; + regulator-name = "USB Power Off"; + regulator-min-microvolt = <5000000>; + regulator-max-microvolt = <5000000>; + regulator-always-on; + regulator-boot-on; + gpio = <&gpio0 21 0>; + }; + }; }; diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a6.dts b/arch/arm/boot/dts/kirkwood-openblocks_a6.dts index 49d3d74d4d3..ede7fe0d7a8 100644 --- a/arch/arm/boot/dts/kirkwood-openblocks_a6.dts +++ b/arch/arm/boot/dts/kirkwood-openblocks_a6.dts @@ -75,6 +75,122 @@ reg = <0x30>; }; }; + + pinctrl: pinctrl@10000 { + pinctrl-0 = < &pmx_nand &pmx_uart0 + &pmx_uart1 &pmx_twsi1 + &pmx_dip_sw0 &pmx_dip_sw1 + &pmx_dip_sw2 &pmx_dip_sw3 + &pmx_gpio_0 &pmx_gpio_1 + &pmx_gpio_2 &pmx_gpio_3 + &pmx_gpio_4 &pmx_gpio_5 + &pmx_gpio_6 &pmx_gpio_7 + &pmx_led_red &pmx_led_green + &pmx_led_yellow >; + pinctrl-names = "default"; + + pmx_uart0: pmx-uart0 { + marvell,pins = "mpp10", "mpp11", "mpp15", + "mpp16"; + marvell,function = "uart0"; + }; + + pmx_uart1: pmx-uart1 { + marvell,pins = "mpp13", "mpp14", "mpp8", + "mpp9"; + marvell,function = "uart1"; + }; + + pmx_sysrst: pmx-sysrst { + marvell,pins = "mpp6"; + marvell,function = "sysrst"; + }; + + pmx_dip_sw0: pmx-dip-sw0 { + marvell,pins = "mpp20"; + marvell,function = "gpio"; + }; + + pmx_dip_sw1: pmx-dip-sw1 { + marvell,pins = "mpp21"; + marvell,function = "gpio"; + }; + + pmx_dip_sw2: pmx-dip-sw2 { + marvell,pins = "mpp22"; + marvell,function = "gpio"; + }; + + pmx_dip_sw3: pmx-dip-sw3 { + marvell,pins = "mpp23"; + marvell,function = "gpio"; + }; + + pmx_gpio_0: pmx-gpio-0 { + marvell,pins = "mpp24"; + marvell,function = "gpio"; + }; + + pmx_gpio_1: pmx-gpio-1 { + marvell,pins = "mpp25"; + marvell,function = "gpio"; + }; + + pmx_gpio_2: pmx-gpio-2 { + marvell,pins = "mpp26"; + marvell,function = "gpio"; + }; + + pmx_gpio_3: pmx-gpio-3 { + marvell,pins = "mpp27"; + marvell,function = "gpio"; + }; + + pmx_gpio_4: pmx-gpio-4 { + marvell,pins = "mpp28"; + marvell,function = "gpio"; + }; + + pmx_gpio_5: pmx-gpio-5 { + marvell,pins = "mpp29"; + marvell,function = "gpio"; + }; + + pmx_gpio_6: pmx-gpio-6 { + marvell,pins = "mpp30"; + marvell,function = "gpio"; + }; + + pmx_gpio_7: pmx-gpio-7 { + marvell,pins = "mpp31"; + marvell,function = "gpio"; + }; + + pmx_gpio_init: pmx-init { + marvell,pins = "mpp38"; + marvell,function = "gpio"; + }; + + pmx_usb_oc: pmx-usb-oc { + marvell,pins = "mpp39"; + marvell,function = "gpio"; + }; + + pmx_led_red: pmx-led-red { + marvell,pins = "mpp41"; + marvell,function = "gpio"; + }; + + pmx_led_green: pmx-led-green { + marvell,pins = "mpp42"; + marvell,function = "gpio"; + }; + + pmx_led_yellow: pmx-led-yellow { + marvell,pins = "mpp43"; + marvell,function = "gpio"; + }; + }; }; gpio-leds { diff --git a/arch/arm/boot/dts/kirkwood-topkick.dts b/arch/arm/boot/dts/kirkwood-topkick.dts index cd15452a52a..842ff95d60d 100644 --- a/arch/arm/boot/dts/kirkwood-topkick.dts +++ b/arch/arm/boot/dts/kirkwood-topkick.dts @@ -1,6 +1,7 @@ /dts-v1/; /include/ "kirkwood.dtsi" +/include/ "kirkwood-6282.dtsi" / { model = "Univeral Scientific Industrial Co. Topkick-1281P2"; @@ -16,6 +17,96 @@ }; ocp@f1000000 { + pinctrl: pinctrl@10000 { + /* + * GPIO LED layout + * + * /-SYS_LED(2) + * | + * | /-DISK_LED + * | | + * | | /-WLAN_LED(2) + * | | | + * [SW] [*] [*] [*] + */ + + /* + * Switch positions + * + * /-SW_LEFT(2) + * | + * | /-SW_IDLE + * | | + * | | /-SW_RIGHT + * | | | + * PS [L] [I] [R] LEDS + */ + pinctrl-0 = < &pmx_led_disk_yellow + &pmx_sata0_pwr_enable + &pmx_led_sys_red + &pmx_led_sys_blue + &pmx_led_wifi_green + &pmx_sw_left + &pmx_sw_right + &pmx_sw_idle + &pmx_sw_left2 + &pmx_led_wifi_yellow + &pmx_uart0 + &pmx_nand + &pmx_twsi0 >; + pinctrl-names = "default"; + + pmx_led_disk_yellow: pmx-led-disk-yellow { + marvell,pins = "mpp21"; + marvell,function = "gpio"; + }; + + pmx_sata0_pwr_enable: pmx-sata0-pwr-enable { + marvell,pins = "mpp36"; + marvell,function = "gpio"; + }; + + pmx_led_sys_red: pmx-led-sys-red { + marvell,pins = "mpp37"; + marvell,function = "gpio"; + }; + + pmx_led_sys_blue: pmx-led-sys-blue { + marvell,pins = "mpp38"; + marvell,function = "gpio"; + }; + + pmx_led_wifi_green: pmx-led-wifi-green { + marvell,pins = "mpp39"; + marvell,function = "gpio"; + }; + + pmx_sw_left: pmx-sw-left { + marvell,pins = "mpp43"; + marvell,function = "gpio"; + }; + + pmx_sw_right: pmx-sw-right { + marvell,pins = "mpp44"; + marvell,function = "gpio"; + }; + + pmx_sw_idle: pmx-sw-idle { + marvell,pins = "mpp45"; + marvell,function = "gpio"; + }; + + pmx_sw_left2: pmx-sw-left2 { + marvell,pins = "mpp46"; + marvell,function = "gpio"; + }; + + pmx_led_wifi_yellow: pmx-led-wifi-yellow { + marvell,pins = "mpp48"; + marvell,function = "gpio"; + }; + }; + serial@12000 { clock-frequency = <200000000>; status = "ok"; @@ -54,6 +145,17 @@ status = "okay"; nr-ports = <1>; }; + + i2c@11000 { + status = "ok"; + }; + + mvsdio@90000 { + pinctrl-0 = <&pmx_sdio>; + pinctrl-names = "default"; + status = "okay"; + /* No CD or WP GPIOs */ + }; }; gpio-leds { diff --git a/arch/arm/boot/dts/kirkwood.dtsi b/arch/arm/boot/dts/kirkwood.dtsi index d6ab442b701..2c738d9dc82 100644 --- a/arch/arm/boot/dts/kirkwood.dtsi +++ b/arch/arm/boot/dts/kirkwood.dtsi @@ -193,5 +193,13 @@ clocks = <&gate_clk 17>; status = "okay"; }; + + mvsdio@90000 { + compatible = "marvell,orion-sdio"; + reg = <0x90000 0x200>; + interrupts = <28>; + clocks = <&gate_clk 4>; + status = "disabled"; + }; }; }; diff --git a/arch/arm/configs/dove_defconfig b/arch/arm/configs/dove_defconfig index 0b7ee92c571..3fe8dae8d32 100644 --- a/arch/arm/configs/dove_defconfig +++ b/arch/arm/configs/dove_defconfig @@ -1,26 +1,24 @@ CONFIG_EXPERIMENTAL=y CONFIG_SYSVIPC=y +CONFIG_NO_HZ=y +CONFIG_HIGH_RES_TIMERS=y CONFIG_LOG_BUF_SHIFT=14 CONFIG_EXPERT=y CONFIG_SLAB=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y CONFIG_ARCH_DOVE=y CONFIG_MACH_DOVE_DB=y CONFIG_MACH_CM_A510=y CONFIG_MACH_DOVE_DT=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y CONFIG_AEABI=y +CONFIG_HIGHMEM=y CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_HIGHMEM=y -CONFIG_USE_OF=y -CONFIG_ATAGS=y CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y -CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER=y CONFIG_VFP=y CONFIG_NET=y CONFIG_PACKET=y @@ -32,8 +30,9 @@ CONFIG_IP_PNP_DHCP=y CONFIG_IP_PNP_BOOTP=y # CONFIG_IPV6 is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y CONFIG_MTD=y -CONFIG_MTD_PARTITIONS=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_CHAR=y CONFIG_MTD_BLOCK=y @@ -57,7 +56,6 @@ CONFIG_ATA=y CONFIG_SATA_MV=y CONFIG_NETDEVICES=y CONFIG_MV643XX_ETH=y -# CONFIG_NETDEV_10000 is not set CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV is not set CONFIG_INPUT_EVDEV=y @@ -68,10 +66,7 @@ CONFIG_LEGACY_PTY_COUNT=16 # CONFIG_DEVKMEM is not set CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y -# CONFIG_SERIAL_8250_PCI is not set CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set CONFIG_I2C=y @@ -81,13 +76,11 @@ CONFIG_SPI=y CONFIG_SPI_ORION=y # CONFIG_HWMON is not set CONFIG_USB=y -CONFIG_USB_DEVICEFS=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_ROOT_HUB_TT=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_MMC_SDHCI=y -CONFIG_MMC_SDHCI_IO_ACCESSORS=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_DOVE=y CONFIG_NEW_LEDS=y @@ -104,6 +97,7 @@ CONFIG_MV_XOR=y CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y # CONFIG_EXT3_FS_XATTR is not set +CONFIG_EXT4_FS=y CONFIG_ISO9660_FS=y CONFIG_JOLIET=y CONFIG_UDF_FS=m @@ -112,24 +106,20 @@ CONFIG_VFAT_FS=y CONFIG_TMPFS=y CONFIG_JFFS2_FS=y CONFIG_NFS_FS=y -CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y -CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_850=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_ISO8859_2=y CONFIG_NLS_UTF8=y +CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_KERNEL=y # CONFIG_SCHED_DEBUG is not set CONFIG_TIMER_STATS=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_DEBUG_INFO=y -CONFIG_SYSCTL_SYSCALL_CHECK=y CONFIG_DEBUG_USER=y -CONFIG_DEBUG_ERRORS=y CONFIG_CRYPTO_NULL=y CONFIG_CRYPTO_ECB=m CONFIG_CRYPTO_PCBC=m @@ -138,7 +128,6 @@ CONFIG_CRYPTO_MD4=y CONFIG_CRYPTO_SHA1=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y -CONFIG_CRYPTO_AES=y CONFIG_CRYPTO_BLOWFISH=y CONFIG_CRYPTO_TEA=y CONFIG_CRYPTO_TWOFISH=y @@ -147,5 +136,4 @@ CONFIG_CRYPTO_LZO=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRYPTO_DEV_MV_CESA=y CONFIG_CRC_CCITT=y -CONFIG_CRC16=y CONFIG_LIBCRC32C=y diff --git a/arch/arm/configs/mvebu_defconfig b/arch/arm/configs/mvebu_defconfig index cbd91bce1ca..2ec8119cff7 100644 --- a/arch/arm/configs/mvebu_defconfig +++ b/arch/arm/configs/mvebu_defconfig @@ -14,16 +14,20 @@ CONFIG_MACH_ARMADA_XP=y # CONFIG_CACHE_L2X0 is not set # CONFIG_SWP_EMULATE is not set CONFIG_SMP=y -# CONFIG_LOCAL_TIMERS is not set CONFIG_AEABI=y CONFIG_HIGHMEM=y # CONFIG_COMPACTION is not set CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 CONFIG_ARM_APPENDED_DTB=y +CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_VFP=y CONFIG_NET=y CONFIG_INET=y +CONFIG_BT=y +CONFIG_BT_MRVL=y +CONFIG_BT_MRVL_SDIO=y +CONFIG_CFG80211=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_SD=y CONFIG_ATA=y @@ -31,16 +35,34 @@ CONFIG_SATA_MV=y CONFIG_NETDEVICES=y CONFIG_MVNETA=y CONFIG_MARVELL_PHY=y +CONFIG_MWIFIEX=y +CONFIG_MWIFIEX_SDIO=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_I2C=y +CONFIG_SPI=y +CONFIG_SPI_ORION=y CONFIG_I2C_MV64XXX=y +CONFIG_MTD=y +CONFIG_MTD_CHAR=y +CONFIG_MTD_M25P80=y CONFIG_SERIAL_8250_DW=y CONFIG_GPIOLIB=y CONFIG_GPIO_SYSFS=y -# CONFIG_USB_SUPPORT is not set +CONFIG_USB_SUPPORT=y +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_EHCI_ROOT_HUB_TT=y +CONFIG_MMC=y +CONFIG_MMC_MVSDIO=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=m +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_S35390A=y +CONFIG_RTC_DRV_MV=y CONFIG_DMADEVICES=y CONFIG_MV_XOR=y # CONFIG_IOMMU_SUPPORT is not set diff --git a/arch/arm/mach-dove/Kconfig b/arch/arm/mach-dove/Kconfig index 603c5fd99e8..36469d81395 100644 --- a/arch/arm/mach-dove/Kconfig +++ b/arch/arm/mach-dove/Kconfig @@ -2,8 +2,12 @@ if ARCH_DOVE menu "Marvell Dove Implementations" +config DOVE_LEGACY + bool + config MACH_DOVE_DB bool "Marvell DB-MV88AP510 Development Board" + select DOVE_LEGACY select I2C_BOARDINFO help Say 'Y' here if you want your kernel to support the @@ -11,6 +15,7 @@ config MACH_DOVE_DB config MACH_CM_A510 bool "CompuLab CM-A510 Board" + select DOVE_LEGACY help Say 'Y' here if you want your kernel to support the CompuLab CM-A510 Board. @@ -19,6 +24,8 @@ config MACH_DOVE_DT bool "Marvell Dove Flattened Device Tree" select MVEBU_CLK_CORE select MVEBU_CLK_GATING + select REGULATOR + select REGULATOR_FIXED_VOLTAGE select USE_OF help Say 'Y' here if you want your kernel to support the diff --git a/arch/arm/mach-dove/Makefile b/arch/arm/mach-dove/Makefile index 5e683baf96c..3f0a858fb59 100644 --- a/arch/arm/mach-dove/Makefile +++ b/arch/arm/mach-dove/Makefile @@ -1,4 +1,6 @@ -obj-y += common.o addr-map.o irq.o mpp.o +obj-y += common.o addr-map.o irq.o +obj-$(CONFIG_DOVE_LEGACY) += mpp.o obj-$(CONFIG_PCI) += pcie.o obj-$(CONFIG_MACH_DOVE_DB) += dove-db-setup.o +obj-$(CONFIG_MACH_DOVE_DT) += board-dt.o obj-$(CONFIG_MACH_CM_A510) += cm-a510.o diff --git a/arch/arm/mach-dove/board-dt.c b/arch/arm/mach-dove/board-dt.c new file mode 100644 index 00000000000..fbde1dd6711 --- /dev/null +++ b/arch/arm/mach-dove/board-dt.c @@ -0,0 +1,92 @@ +/* + * arch/arm/mach-dove/board-dt.c + * + * Marvell Dove 88AP510 System On Chip FDT Board + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/init.h> +#include <linux/clk-provider.h> +#include <linux/clk/mvebu.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_data/usb-ehci-orion.h> +#include <asm/hardware/cache-tauros2.h> +#include <asm/mach/arch.h> +#include <mach/pm.h> +#include <plat/common.h> +#include <plat/irq.h> +#include "common.h" + +/* + * There are still devices that doesn't even know about DT, + * get clock gates here and add a clock lookup. + */ +static void __init dove_legacy_clk_init(void) +{ + struct device_node *np = of_find_compatible_node(NULL, NULL, + "marvell,dove-gating-clock"); + struct of_phandle_args clkspec; + + clkspec.np = np; + clkspec.args_count = 1; + + clkspec.args[0] = CLOCK_GATING_BIT_GBE; + orion_clkdev_add(NULL, "mv643xx_eth_port.0", + of_clk_get_from_provider(&clkspec)); + + clkspec.args[0] = CLOCK_GATING_BIT_PCIE0; + orion_clkdev_add("0", "pcie", + of_clk_get_from_provider(&clkspec)); + + clkspec.args[0] = CLOCK_GATING_BIT_PCIE1; + orion_clkdev_add("1", "pcie", + of_clk_get_from_provider(&clkspec)); +} + +static void __init dove_of_clk_init(void) +{ + mvebu_clocks_init(); + dove_legacy_clk_init(); +} + +static struct mv643xx_eth_platform_data dove_dt_ge00_data = { + .phy_addr = MV643XX_ETH_PHY_ADDR_DEFAULT, +}; + +static void __init dove_dt_init(void) +{ + pr_info("Dove 88AP510 SoC\n"); + +#ifdef CONFIG_CACHE_TAUROS2 + tauros2_init(0); +#endif + dove_setup_cpu_mbus(); + + /* Setup root of clk tree */ + dove_of_clk_init(); + + /* Internal devices not ported to DT yet */ + dove_ge00_init(&dove_dt_ge00_data); + dove_pcie_init(1, 1); + + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); +} + +static const char * const dove_dt_board_compat[] = { + "marvell,dove", + NULL +}; + +DT_MACHINE_START(DOVE_DT, "Marvell Dove (Flattened Device Tree)") + .map_io = dove_map_io, + .init_early = dove_init_early, + .init_irq = orion_dt_init_irq, + .init_time = dove_timer_init, + .init_machine = dove_dt_init, + .restart = dove_restart, + .dt_compat = dove_dt_board_compat, +MACHINE_END diff --git a/arch/arm/mach-dove/common.c b/arch/arm/mach-dove/common.c index ea84c535a11..c6b3b2bb50e 100644 --- a/arch/arm/mach-dove/common.c +++ b/arch/arm/mach-dove/common.c @@ -360,88 +360,3 @@ void dove_restart(char mode, const char *cmd) while (1) ; } - -#if defined(CONFIG_MACH_DOVE_DT) -/* - * There are still devices that doesn't even know about DT, - * get clock gates here and add a clock lookup. - */ -static void __init dove_legacy_clk_init(void) -{ - struct device_node *np = of_find_compatible_node(NULL, NULL, - "marvell,dove-gating-clock"); - struct of_phandle_args clkspec; - - clkspec.np = np; - clkspec.args_count = 1; - - clkspec.args[0] = CLOCK_GATING_BIT_USB0; - orion_clkdev_add(NULL, "orion-ehci.0", - of_clk_get_from_provider(&clkspec)); - - clkspec.args[0] = CLOCK_GATING_BIT_USB1; - orion_clkdev_add(NULL, "orion-ehci.1", - of_clk_get_from_provider(&clkspec)); - - clkspec.args[0] = CLOCK_GATING_BIT_GBE; - orion_clkdev_add(NULL, "mv643xx_eth_port.0", - of_clk_get_from_provider(&clkspec)); - - clkspec.args[0] = CLOCK_GATING_BIT_PCIE0; - orion_clkdev_add("0", "pcie", - of_clk_get_from_provider(&clkspec)); - - clkspec.args[0] = CLOCK_GATING_BIT_PCIE1; - orion_clkdev_add("1", "pcie", - of_clk_get_from_provider(&clkspec)); -} - -static void __init dove_of_clk_init(void) -{ - mvebu_clocks_init(); - dove_legacy_clk_init(); -} - -static struct mv643xx_eth_platform_data dove_dt_ge00_data = { - .phy_addr = MV643XX_ETH_PHY_ADDR_DEFAULT, -}; - -static void __init dove_dt_init(void) -{ - pr_info("Dove 88AP510 SoC, TCLK = %d MHz.\n", - (dove_tclk + 499999) / 1000000); - -#ifdef CONFIG_CACHE_TAUROS2 - tauros2_init(0); -#endif - dove_setup_cpu_mbus(); - - /* Setup root of clk tree */ - dove_of_clk_init(); - - /* Internal devices not ported to DT yet */ - dove_rtc_init(); - - dove_ge00_init(&dove_dt_ge00_data); - dove_ehci0_init(); - dove_ehci1_init(); - dove_pcie_init(1, 1); - - of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); -} - -static const char * const dove_dt_board_compat[] = { - "marvell,dove", - NULL -}; - -DT_MACHINE_START(DOVE_DT, "Marvell Dove (Flattened Device Tree)") - .map_io = dove_map_io, - .init_early = dove_init_early, - .init_irq = orion_dt_init_irq, - .init_time = dove_timer_init, - .init_machine = dove_dt_init, - .restart = dove_restart, - .dt_compat = dove_dt_board_compat, -MACHINE_END -#endif diff --git a/arch/arm/mach-imx/clk-imx51-imx53.c b/arch/arm/mach-imx/clk-imx51-imx53.c index fb7cb841b64..0f39f8c93b9 100644 --- a/arch/arm/mach-imx/clk-imx51-imx53.c +++ b/arch/arm/mach-imx/clk-imx51-imx53.c @@ -83,6 +83,7 @@ enum imx5_clks { ssi2_root_gate, ssi3_root_gate, ssi_ext1_gate, ssi_ext2_gate, epit1_ipg_gate, epit1_hf_gate, epit2_ipg_gate, epit2_hf_gate, can_sel, can1_serial_gate, can1_ipg_gate, + owire_gate, clk_max }; @@ -233,12 +234,13 @@ static void __init mx5_clocks_common_init(unsigned long rate_ckil, clk[epit1_hf_gate] = imx_clk_gate2("epit1_hf_gate", "per_root", MXC_CCM_CCGR2, 4); clk[epit2_ipg_gate] = imx_clk_gate2("epit2_ipg_gate", "ipg", MXC_CCM_CCGR2, 6); clk[epit2_hf_gate] = imx_clk_gate2("epit2_hf_gate", "per_root", MXC_CCM_CCGR2, 8); + clk[owire_gate] = imx_clk_gate2("owire_gate", "per_root", MXC_CCM_CCGR2, 22); for (i = 0; i < ARRAY_SIZE(clk); i++) if (IS_ERR(clk[i])) pr_err("i.MX5 clk %d: register failed with %ld\n", i, PTR_ERR(clk[i])); - + clk_register_clkdev(clk[gpt_hf_gate], "per", "imx-gpt.0"); clk_register_clkdev(clk[gpt_ipg_gate], "ipg", "imx-gpt.0"); clk_register_clkdev(clk[uart1_per_gate], "per", "imx21-uart.0"); diff --git a/arch/arm/mach-imx/clk-imx6q.c b/arch/arm/mach-imx/clk-imx6q.c index 540138c4606..7b025ee528a 100644 --- a/arch/arm/mach-imx/clk-imx6q.c +++ b/arch/arm/mach-imx/clk-imx6q.c @@ -164,8 +164,8 @@ enum mx6q_clks { usdhc4, vdo_axi, vpu_axi, cko1, pll1_sys, pll2_bus, pll3_usb_otg, pll4_audio, pll5_video, pll8_mlb, pll7_usb_host, pll6_enet, ssi1_ipg, ssi2_ipg, ssi3_ipg, rom, usbphy1, usbphy2, ldb_di0_div_3_5, ldb_di1_div_3_5, - sata_ref, sata_ref_100m, pcie_ref, pcie_ref_125m, enet_ref, - clk_max + sata_ref, sata_ref_100m, pcie_ref, pcie_ref_125m, enet_ref, usbphy1_gate, + usbphy2_gate, clk_max }; static struct clk *clk[clk_max]; @@ -218,8 +218,21 @@ int __init mx6q_clocks_init(void) clk[pll7_usb_host] = imx_clk_pllv3(IMX_PLLV3_USB, "pll7_usb_host","osc", base + 0x20, 0x3); clk[pll8_mlb] = imx_clk_pllv3(IMX_PLLV3_MLB, "pll8_mlb", "osc", base + 0xd0, 0x0); - clk[usbphy1] = imx_clk_gate("usbphy1", "pll3_usb_otg", base + 0x10, 6); - clk[usbphy2] = imx_clk_gate("usbphy2", "pll7_usb_host", base + 0x20, 6); + /* + * Bit 20 is the reserved and read-only bit, we do this only for: + * - Do nothing for usbphy clk_enable/disable + * - Keep refcount when do usbphy clk_enable/disable, in that case, + * the clk framework may need to enable/disable usbphy's parent + */ + clk[usbphy1] = imx_clk_gate("usbphy1", "pll3_usb_otg", base + 0x10, 20); + clk[usbphy2] = imx_clk_gate("usbphy2", "pll7_usb_host", base + 0x20, 20); + + /* + * usbphy*_gate needs to be on after system boots up, and software + * never needs to control it anymore. + */ + clk[usbphy1_gate] = imx_clk_gate("usbphy1_gate", "dummy", base + 0x10, 6); + clk[usbphy2_gate] = imx_clk_gate("usbphy2_gate", "dummy", base + 0x20, 6); clk[sata_ref] = imx_clk_fixed_factor("sata_ref", "pll6_enet", 1, 5); clk[pcie_ref] = imx_clk_fixed_factor("pcie_ref", "pll6_enet", 1, 4); @@ -446,6 +459,11 @@ int __init mx6q_clocks_init(void) for (i = 0; i < ARRAY_SIZE(clks_init_on); i++) clk_prepare_enable(clk[clks_init_on[i]]); + if (IS_ENABLED(CONFIG_USB_MXS_PHY)) { + clk_prepare_enable(clk[usbphy1_gate]); + clk_prepare_enable(clk[usbphy2_gate]); + } + /* Set initial power mode */ imx6q_set_lpm(WAIT_CLOCKED); diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 1786b2d1257..9ffd103b27e 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -12,6 +12,7 @@ #include <linux/clk.h> #include <linux/clkdev.h> +#include <linux/cpu.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/init.h> @@ -22,6 +23,7 @@ #include <linux/of_address.h> #include <linux/of_irq.h> #include <linux/of_platform.h> +#include <linux/opp.h> #include <linux/phy.h> #include <linux/regmap.h> #include <linux/micrel_phy.h> @@ -200,6 +202,64 @@ static void __init imx6q_init_machine(void) imx6q_1588_init(); } +#define OCOTP_CFG3 0x440 +#define OCOTP_CFG3_SPEED_SHIFT 16 +#define OCOTP_CFG3_SPEED_1P2GHZ 0x3 + +static void __init imx6q_opp_check_1p2ghz(struct device *cpu_dev) +{ + struct device_node *np; + void __iomem *base; + u32 val; + + np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-ocotp"); + if (!np) { + pr_warn("failed to find ocotp node\n"); + return; + } + + base = of_iomap(np, 0); + if (!base) { + pr_warn("failed to map ocotp\n"); + goto put_node; + } + + val = readl_relaxed(base + OCOTP_CFG3); + val >>= OCOTP_CFG3_SPEED_SHIFT; + if ((val & 0x3) != OCOTP_CFG3_SPEED_1P2GHZ) + if (opp_disable(cpu_dev, 1200000000)) + pr_warn("failed to disable 1.2 GHz OPP\n"); + +put_node: + of_node_put(np); +} + +static void __init imx6q_opp_init(struct device *cpu_dev) +{ + struct device_node *np; + + np = of_find_node_by_path("/cpus/cpu@0"); + if (!np) { + pr_warn("failed to find cpu0 node\n"); + return; + } + + cpu_dev->of_node = np; + if (of_init_opp_table(cpu_dev)) { + pr_warn("failed to init OPP table\n"); + goto put_node; + } + + imx6q_opp_check_1p2ghz(cpu_dev); + +put_node: + of_node_put(np); +} + +struct platform_device imx6q_cpufreq_pdev = { + .name = "imx6q-cpufreq", +}; + static void __init imx6q_init_late(void) { /* @@ -208,6 +268,11 @@ static void __init imx6q_init_late(void) */ if (imx6q_revision() > IMX_CHIP_REVISION_1_1) imx6q_cpuidle_init(); + + if (IS_ENABLED(CONFIG_ARM_IMX6Q_CPUFREQ)) { + imx6q_opp_init(&imx6q_cpufreq_pdev.dev); + platform_device_register(&imx6q_cpufreq_pdev); + } } static void __init imx6q_map_io(void) diff --git a/arch/arm/mach-kirkwood/Kconfig b/arch/arm/mach-kirkwood/Kconfig index f91cdff5a3e..7b6a64bc5f4 100644 --- a/arch/arm/mach-kirkwood/Kconfig +++ b/arch/arm/mach-kirkwood/Kconfig @@ -58,6 +58,13 @@ config ARCH_KIRKWOOD_DT Say 'Y' here if you want your kernel to support the Marvell Kirkwood using flattened device tree. +config MACH_GURUPLUG_DT + bool "Marvell GuruPlug Reference Board (Flattened Device Tree)" + select ARCH_KIRKWOOD_DT + help + Say 'Y' here if you want your kernel to support the + Marvell GuruPlug Reference Board (Flattened Device Tree). + config MACH_DREAMPLUG_DT bool "Marvell DreamPlug (Flattened Device Tree)" select ARCH_KIRKWOOD_DT diff --git a/arch/arm/mach-kirkwood/Makefile b/arch/arm/mach-kirkwood/Makefile index d6653095a1e..4cc4bee4d0c 100644 --- a/arch/arm/mach-kirkwood/Makefile +++ b/arch/arm/mach-kirkwood/Makefile @@ -21,6 +21,7 @@ obj-$(CONFIG_MACH_T5325) += t5325-setup.o obj-$(CONFIG_ARCH_KIRKWOOD_DT) += board-dt.o obj-$(CONFIG_MACH_DREAMPLUG_DT) += board-dreamplug.o +obj-$(CONFIG_MACH_GURUPLUG_DT) += board-guruplug.o obj-$(CONFIG_MACH_ICONNECT_DT) += board-iconnect.o obj-$(CONFIG_MACH_DLINK_KIRKWOOD_DT) += board-dnskw.o obj-$(CONFIG_MACH_IB62X0_DT) += board-ib62x0.o diff --git a/arch/arm/mach-kirkwood/board-dreamplug.c b/arch/arm/mach-kirkwood/board-dreamplug.c index 08248e24ffc..0903242c00d 100644 --- a/arch/arm/mach-kirkwood/board-dreamplug.c +++ b/arch/arm/mach-kirkwood/board-dreamplug.c @@ -15,7 +15,6 @@ #include <linux/init.h> #include <linux/mv643xx_eth.h> #include <linux/gpio.h> -#include <linux/platform_data/mmc-mvsdio.h> #include "common.h" static struct mv643xx_eth_platform_data dreamplug_ge00_data = { @@ -26,10 +25,6 @@ static struct mv643xx_eth_platform_data dreamplug_ge01_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(1), }; -static struct mvsdio_platform_data dreamplug_mvsdio_data = { - /* unfortunately the CD signal has not been connected */ -}; - void __init dreamplug_init(void) { /* @@ -37,5 +32,4 @@ void __init dreamplug_init(void) */ kirkwood_ge00_init(&dreamplug_ge00_data); kirkwood_ge01_init(&dreamplug_ge01_data); - kirkwood_sdio_init(&dreamplug_mvsdio_data); } diff --git a/arch/arm/mach-kirkwood/board-dt.c b/arch/arm/mach-kirkwood/board-dt.c index 95cc04d14b6..2e73e9d53f7 100644 --- a/arch/arm/mach-kirkwood/board-dt.c +++ b/arch/arm/mach-kirkwood/board-dt.c @@ -55,10 +55,6 @@ static void __init kirkwood_legacy_clk_init(void) orion_clkdev_add("0", "pcie", of_clk_get_from_provider(&clkspec)); - clkspec.args[0] = CGC_BIT_USB0; - orion_clkdev_add(NULL, "orion-ehci.0", - of_clk_get_from_provider(&clkspec)); - clkspec.args[0] = CGC_BIT_PEX1; orion_clkdev_add("1", "pcie", of_clk_get_from_provider(&clkspec)); @@ -66,11 +62,6 @@ static void __init kirkwood_legacy_clk_init(void) clkspec.args[0] = CGC_BIT_GE1; orion_clkdev_add(NULL, "mv643xx_eth_port.1", of_clk_get_from_provider(&clkspec)); - - clkspec.args[0] = CGC_BIT_SDIO; - orion_clkdev_add(NULL, "mvsdio", - of_clk_get_from_provider(&clkspec)); - } static void __init kirkwood_of_clk_init(void) @@ -107,6 +98,9 @@ static void __init kirkwood_dt_init(void) if (of_machine_is_compatible("globalscale,dreamplug")) dreamplug_init(); + if (of_machine_is_compatible("globalscale,guruplug")) + guruplug_dt_init(); + if (of_machine_is_compatible("dlink,dns-kirkwood")) dnskw_init(); @@ -150,14 +144,12 @@ static void __init kirkwood_dt_init(void) if (of_machine_is_compatible("usi,topkick")) usi_topkick_init(); - if (of_machine_is_compatible("zyxel,nsa310")) - nsa310_init(); - of_platform_populate(NULL, kirkwood_dt_match_table, NULL, NULL); } static const char * const kirkwood_dt_board_compat[] = { "globalscale,dreamplug", + "globalscale,guruplug", "dlink,dns-320", "dlink,dns-325", "iom,iconnect", diff --git a/arch/arm/mach-kirkwood/board-guruplug.c b/arch/arm/mach-kirkwood/board-guruplug.c new file mode 100644 index 00000000000..0a0df4554d8 --- /dev/null +++ b/arch/arm/mach-kirkwood/board-guruplug.c @@ -0,0 +1,39 @@ +/* + * arch/arm/mach-kirkwood/board-guruplug.c + * + * Marvell Guruplug Reference Board Init for drivers not converted to + * flattened device tree yet. + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/mv643xx_eth.h> +#include <linux/gpio.h> +#include <linux/platform_data/mmc-mvsdio.h> +#include "common.h" + +static struct mv643xx_eth_platform_data guruplug_ge00_data = { + .phy_addr = MV643XX_ETH_PHY_ADDR(0), +}; + +static struct mv643xx_eth_platform_data guruplug_ge01_data = { + .phy_addr = MV643XX_ETH_PHY_ADDR(1), +}; + +static struct mvsdio_platform_data guruplug_mvsdio_data = { + /* unfortunately the CD signal has not been connected */ +}; + +void __init guruplug_dt_init(void) +{ + /* + * Basic setup. Needs to be called early. + */ + kirkwood_ge00_init(&guruplug_ge00_data); + kirkwood_ge01_init(&guruplug_ge01_data); + kirkwood_sdio_init(&guruplug_mvsdio_data); +} diff --git a/arch/arm/mach-kirkwood/board-mplcec4.c b/arch/arm/mach-kirkwood/board-mplcec4.c index 3264925b831..7d6dc669e17 100644 --- a/arch/arm/mach-kirkwood/board-mplcec4.c +++ b/arch/arm/mach-kirkwood/board-mplcec4.c @@ -12,7 +12,6 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/mv643xx_eth.h> -#include <linux/platform_data/mmc-mvsdio.h> #include "common.h" static struct mv643xx_eth_platform_data mplcec4_ge00_data = { @@ -23,11 +22,6 @@ static struct mv643xx_eth_platform_data mplcec4_ge01_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(2), }; -static struct mvsdio_platform_data mplcec4_mvsdio_data = { - .gpio_card_detect = 47, /* MPP47 used as SD card detect */ -}; - - void __init mplcec4_init(void) { /* @@ -35,7 +29,6 @@ void __init mplcec4_init(void) */ kirkwood_ge00_init(&mplcec4_ge00_data); kirkwood_ge01_init(&mplcec4_ge01_data); - kirkwood_sdio_init(&mplcec4_mvsdio_data); kirkwood_pcie_init(KW_PCIE0); } diff --git a/arch/arm/mach-kirkwood/board-ns2.c b/arch/arm/mach-kirkwood/board-ns2.c index f4632a809f6..f2ea3b7ad72 100644 --- a/arch/arm/mach-kirkwood/board-ns2.c +++ b/arch/arm/mach-kirkwood/board-ns2.c @@ -15,7 +15,6 @@ #include <linux/init.h> #include <linux/platform_device.h> #include <linux/mv643xx_eth.h> -#include <linux/gpio.h> #include <linux/of.h> #include "common.h" @@ -23,13 +22,6 @@ static struct mv643xx_eth_platform_data ns2_ge00_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(8), }; -#define NS2_GPIO_POWER_OFF 31 - -static void ns2_power_off(void) -{ - gpio_set_value(NS2_GPIO_POWER_OFF, 1); -} - void __init ns2_init(void) { /* @@ -39,10 +31,4 @@ void __init ns2_init(void) of_machine_is_compatible("lacie,netspace_mini_v2")) ns2_ge00_data.phy_addr = MV643XX_ETH_PHY_ADDR(0); kirkwood_ge00_init(&ns2_ge00_data); - - if (gpio_request(NS2_GPIO_POWER_OFF, "power-off") == 0 && - gpio_direction_output(NS2_GPIO_POWER_OFF, 0) == 0) - pm_power_off = ns2_power_off; - else - pr_err("ns2: failed to configure power-off GPIO\n"); } diff --git a/arch/arm/mach-kirkwood/board-nsa310.c b/arch/arm/mach-kirkwood/board-nsa310.c index 970174ad4a7..55ade93b93b 100644 --- a/arch/arm/mach-kirkwood/board-nsa310.c +++ b/arch/arm/mach-kirkwood/board-nsa310.c @@ -10,79 +10,9 @@ #include <linux/kernel.h> #include <linux/init.h> -#include <linux/gpio.h> -#include <linux/i2c.h> #include <mach/kirkwood.h> #include <linux/of.h> #include "common.h" -#include "mpp.h" - -#define NSA310_GPIO_USB_POWER_OFF 21 -#define NSA310_GPIO_POWER_OFF 48 - -static unsigned int nsa310_mpp_config[] __initdata = { - MPP12_GPIO, /* led esata green */ - MPP13_GPIO, /* led esata red */ - MPP15_GPIO, /* led usb green */ - MPP16_GPIO, /* led usb red */ - MPP21_GPIO, /* control usb power off */ - MPP28_GPIO, /* led sys green */ - MPP29_GPIO, /* led sys red */ - MPP36_GPIO, /* key reset */ - MPP37_GPIO, /* key copy */ - MPP39_GPIO, /* led copy green */ - MPP40_GPIO, /* led copy red */ - MPP41_GPIO, /* led hdd green */ - MPP42_GPIO, /* led hdd red */ - MPP44_GPIO, /* ?? */ - MPP46_GPIO, /* key power */ - MPP48_GPIO, /* control power off */ - 0 -}; - -static struct i2c_board_info __initdata nsa310_i2c_info[] = { - { I2C_BOARD_INFO("adt7476", 0x2e) }, -}; - -static void nsa310_power_off(void) -{ - gpio_set_value(NSA310_GPIO_POWER_OFF, 1); -} - -static int __init nsa310_gpio_request(unsigned int gpio, unsigned long flags, - const char *label) -{ - int err; - - err = gpio_request_one(gpio, flags, label); - if (err) - pr_err("NSA-310: can't setup GPIO%u (%s), err=%d\n", - gpio, label, err); - - return err; -} - -static void __init nsa310_gpio_init(void) -{ - int err; - - err = nsa310_gpio_request(NSA310_GPIO_POWER_OFF, GPIOF_OUT_INIT_LOW, - "Power Off"); - if (!err) - pm_power_off = nsa310_power_off; - - nsa310_gpio_request(NSA310_GPIO_USB_POWER_OFF, GPIOF_OUT_INIT_LOW, - "USB Power Off"); -} - -void __init nsa310_init(void) -{ - kirkwood_mpp_conf(nsa310_mpp_config); - - nsa310_gpio_init(); - - i2c_register_board_info(0, ARRAY_AND_SIZE(nsa310_i2c_info)); -} static int __init nsa310_pci_init(void) { diff --git a/arch/arm/mach-kirkwood/board-openblocks_a6.c b/arch/arm/mach-kirkwood/board-openblocks_a6.c index 815fc6451d5..b11d8fdeca9 100644 --- a/arch/arm/mach-kirkwood/board-openblocks_a6.c +++ b/arch/arm/mach-kirkwood/board-openblocks_a6.c @@ -11,60 +11,16 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/mv643xx_eth.h> -#include <linux/clk.h> -#include <linux/clk-private.h> #include "common.h" -#include "mpp.h" static struct mv643xx_eth_platform_data openblocks_ge00_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(0), }; -static unsigned int openblocks_a6_mpp_config[] __initdata = { - MPP0_NF_IO2, - MPP1_NF_IO3, - MPP2_NF_IO4, - MPP3_NF_IO5, - MPP4_NF_IO6, - MPP5_NF_IO7, - MPP6_SYSRST_OUTn, - MPP8_UART1_RTS, - MPP9_UART1_CTS, - MPP10_UART0_TXD, - MPP11_UART0_RXD, - MPP13_UART1_TXD, - MPP14_UART1_RXD, - MPP15_UART0_RTS, - MPP16_UART0_CTS, - MPP18_NF_IO0, - MPP19_NF_IO1, - MPP20_GPIO, /* DIP SW0 */ - MPP21_GPIO, /* DIP SW1 */ - MPP22_GPIO, /* DIP SW2 */ - MPP23_GPIO, /* DIP SW3 */ - MPP24_GPIO, /* GPIO 0 */ - MPP25_GPIO, /* GPIO 1 */ - MPP26_GPIO, /* GPIO 2 */ - MPP27_GPIO, /* GPIO 3 */ - MPP28_GPIO, /* GPIO 4 */ - MPP29_GPIO, /* GPIO 5 */ - MPP30_GPIO, /* GPIO 6 */ - MPP31_GPIO, /* GPIO 7 */ - MPP36_TW1_SDA, - MPP37_TW1_SCK, - MPP38_GPIO, /* INIT */ - MPP39_GPIO, /* USB OC */ - MPP41_GPIO, /* LED: Red */ - MPP42_GPIO, /* LED: Green */ - MPP43_GPIO, /* LED: Yellow */ - 0, -}; - void __init openblocks_a6_init(void) { /* * Basic setup. Needs to be called early. */ - kirkwood_mpp_conf(openblocks_a6_mpp_config); kirkwood_ge00_init(&openblocks_ge00_data); } diff --git a/arch/arm/mach-kirkwood/board-usi_topkick.c b/arch/arm/mach-kirkwood/board-usi_topkick.c index 23d2dd1b1b1..1cc04ec33f0 100644 --- a/arch/arm/mach-kirkwood/board-usi_topkick.c +++ b/arch/arm/mach-kirkwood/board-usi_topkick.c @@ -14,64 +14,16 @@ #include <linux/init.h> #include <linux/mv643xx_eth.h> #include <linux/gpio.h> -#include <linux/platform_data/mmc-mvsdio.h> #include "common.h" -#include "mpp.h" static struct mv643xx_eth_platform_data topkick_ge00_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(0), }; -static struct mvsdio_platform_data topkick_mvsdio_data = { - /* unfortunately the CD signal has not been connected */ -}; - -/* - * GPIO LED layout - * - * /-SYS_LED(2) - * | - * | /-DISK_LED - * | | - * | | /-WLAN_LED(2) - * | | | - * [SW] [*] [*] [*] - */ - -/* - * Switch positions - * - * /-SW_LEFT - * | - * | /-SW_IDLE - * | | - * | | /-SW_RIGHT - * | | | - * PS [L] [I] [R] LEDS - */ - -static unsigned int topkick_mpp_config[] __initdata = { - MPP21_GPIO, /* DISK_LED (low active) - yellow */ - MPP36_GPIO, /* SATA0 power enable (high active) */ - MPP37_GPIO, /* SYS_LED2 (low active) - red */ - MPP38_GPIO, /* SYS_LED (low active) - blue */ - MPP39_GPIO, /* WLAN_LED (low active) - green */ - MPP43_GPIO, /* SW_LEFT (low active) */ - MPP44_GPIO, /* SW_RIGHT (low active) */ - MPP45_GPIO, /* SW_IDLE (low active) */ - MPP46_GPIO, /* SW_LEFT (low active) */ - MPP48_GPIO, /* WLAN_LED2 (low active) - yellow */ - 0 -}; - void __init usi_topkick_init(void) { /* * Basic setup. Needs to be called early. */ - kirkwood_mpp_conf(topkick_mpp_config); - - kirkwood_ge00_init(&topkick_ge00_data); - kirkwood_sdio_init(&topkick_mvsdio_data); } diff --git a/arch/arm/mach-kirkwood/common.h b/arch/arm/mach-kirkwood/common.h index e956d0277dd..5ed70565c84 100644 --- a/arch/arm/mach-kirkwood/common.h +++ b/arch/arm/mach-kirkwood/common.h @@ -60,6 +60,11 @@ void dreamplug_init(void); #else static inline void dreamplug_init(void) {}; #endif +#ifdef CONFIG_MACH_GURUPLUG_DT +void guruplug_dt_init(void); +#else +static inline void guruplug_dt_init(void) {}; +#endif #ifdef CONFIG_MACH_TS219_DT void qnap_dt_ts219_init(void); #else @@ -130,12 +135,6 @@ void ns2_init(void); static inline void ns2_init(void) {}; #endif -#ifdef CONFIG_MACH_NSA310_DT -void nsa310_init(void); -#else -static inline void nsa310_init(void) {}; -#endif - #ifdef CONFIG_MACH_OPENBLOCKS_A6_DT void openblocks_a6_init(void); #else diff --git a/arch/arm/mach-mvebu/irq-armada-370-xp.c b/arch/arm/mach-mvebu/irq-armada-370-xp.c index 8e3fb082c3c..274ff58271d 100644 --- a/arch/arm/mach-mvebu/irq-armada-370-xp.c +++ b/arch/arm/mach-mvebu/irq-armada-370-xp.c @@ -34,6 +34,7 @@ #define ARMADA_370_XP_INT_CONTROL (0x00) #define ARMADA_370_XP_INT_SET_ENABLE_OFFS (0x30) #define ARMADA_370_XP_INT_CLEAR_ENABLE_OFFS (0x34) +#define ARMADA_370_XP_INT_SOURCE_CTL(irq) (0x100 + irq*4) #define ARMADA_370_XP_CPU_INTACK_OFFS (0x44) @@ -41,28 +42,90 @@ #define ARMADA_370_XP_IN_DRBEL_MSK_OFFS (0xc) #define ARMADA_370_XP_IN_DRBEL_CAUSE_OFFS (0x8) +#define ARMADA_370_XP_MAX_PER_CPU_IRQS (28) + #define ACTIVE_DOORBELLS (8) +static DEFINE_RAW_SPINLOCK(irq_controller_lock); + static void __iomem *per_cpu_int_base; static void __iomem *main_int_base; static struct irq_domain *armada_370_xp_mpic_domain; +/* + * In SMP mode: + * For shared global interrupts, mask/unmask global enable bit + * For CPU interrtups, mask/unmask the calling CPU's bit + */ static void armada_370_xp_irq_mask(struct irq_data *d) { +#ifdef CONFIG_SMP + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + if (hwirq > ARMADA_370_XP_MAX_PER_CPU_IRQS) + writel(hwirq, main_int_base + + ARMADA_370_XP_INT_CLEAR_ENABLE_OFFS); + else + writel(hwirq, per_cpu_int_base + + ARMADA_370_XP_INT_SET_MASK_OFFS); +#else writel(irqd_to_hwirq(d), per_cpu_int_base + ARMADA_370_XP_INT_SET_MASK_OFFS); +#endif } static void armada_370_xp_irq_unmask(struct irq_data *d) { +#ifdef CONFIG_SMP + irq_hw_number_t hwirq = irqd_to_hwirq(d); + + if (hwirq > ARMADA_370_XP_MAX_PER_CPU_IRQS) + writel(hwirq, main_int_base + + ARMADA_370_XP_INT_SET_ENABLE_OFFS); + else + writel(hwirq, per_cpu_int_base + + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); +#else writel(irqd_to_hwirq(d), per_cpu_int_base + ARMADA_370_XP_INT_CLEAR_MASK_OFFS); +#endif } #ifdef CONFIG_SMP static int armada_xp_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { + unsigned long reg; + unsigned long new_mask = 0; + unsigned long online_mask = 0; + unsigned long count = 0; + irq_hw_number_t hwirq = irqd_to_hwirq(d); + int cpu; + + for_each_cpu(cpu, mask_val) { + new_mask |= 1 << cpu_logical_map(cpu); + count++; + } + + /* + * Forbid mutlicore interrupt affinity + * This is required since the MPIC HW doesn't limit + * several CPUs from acknowledging the same interrupt. + */ + if (count > 1) + return -EINVAL; + + for_each_cpu(cpu, cpu_online_mask) + online_mask |= 1 << cpu_logical_map(cpu); + + raw_spin_lock(&irq_controller_lock); + + reg = readl(main_int_base + ARMADA_370_XP_INT_SOURCE_CTL(hwirq)); + reg = (reg & (~online_mask)) | new_mask; + writel(reg, main_int_base + ARMADA_370_XP_INT_SOURCE_CTL(hwirq)); + + raw_spin_unlock(&irq_controller_lock); + return 0; } #endif @@ -82,10 +145,17 @@ static int armada_370_xp_mpic_irq_map(struct irq_domain *h, { armada_370_xp_irq_mask(irq_get_irq_data(virq)); writel(hw, main_int_base + ARMADA_370_XP_INT_SET_ENABLE_OFFS); - - irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip, - handle_level_irq); irq_set_status_flags(virq, IRQ_LEVEL); + + if (hw < ARMADA_370_XP_MAX_PER_CPU_IRQS) { + irq_set_percpu_devid(virq); + irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip, + handle_percpu_devid_irq); + + } else { + irq_set_chip_and_handler(virq, &armada_370_xp_irq_chip, + handle_level_irq); + } set_irq_flags(virq, IRQF_VALID | IRQF_PROBE); return 0; @@ -155,6 +225,15 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node, #ifdef CONFIG_SMP armada_xp_mpic_smp_cpu_init(); + + /* + * Set the default affinity from all CPUs to the boot cpu. + * This is required since the MPIC doesn't limit several CPUs + * from acknowledging the same interrupt. + */ + cpumask_clear(irq_default_affinity); + cpumask_set_cpu(smp_processor_id(), irq_default_affinity); + #endif return 0; @@ -173,7 +252,7 @@ asmlinkage void __exception_irq_entry armada_370_xp_handle_irq(struct pt_regs if (irqnr > 1022) break; - if (irqnr >= 8) { + if (irqnr > 0) { irqnr = irq_find_mapping(armada_370_xp_mpic_domain, irqnr); handle_IRQ(irqnr, regs); diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile index ff528df7011..b068b7fe99e 100644 --- a/arch/arm/mach-omap2/Makefile +++ b/arch/arm/mach-omap2/Makefile @@ -11,7 +11,7 @@ obj-y := id.o io.o control.o mux.o devices.o fb.o serial.o gpmc.o timer.o pm.o \ omap_device.o sram.o omap-2-3-common = irq.o -hwmod-common = omap_hwmod.o \ +hwmod-common = omap_hwmod.o omap_hwmod_reset.o \ omap_hwmod_common_data.o clock-common = clock.o clock_common_data.o \ clkt_dpll.o clkt_clksel.o @@ -56,6 +56,7 @@ AFLAGS_sram34xx.o :=-Wa,-march=armv7-a # Restart code (OMAP4/5 currently in omap4-common.c) obj-$(CONFIG_SOC_OMAP2420) += omap2-restart.o obj-$(CONFIG_SOC_OMAP2430) += omap2-restart.o +obj-$(CONFIG_SOC_AM33XX) += am33xx-restart.o obj-$(CONFIG_ARCH_OMAP3) += omap3-restart.o # Pin multiplexing diff --git a/arch/arm/mach-omap2/am33xx-restart.c b/arch/arm/mach-omap2/am33xx-restart.c new file mode 100644 index 00000000000..88e4fa8af03 --- /dev/null +++ b/arch/arm/mach-omap2/am33xx-restart.c @@ -0,0 +1,34 @@ +/* + * am33xx-restart.c - Code common to all AM33xx machines. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/kernel.h> + +#include "common.h" +#include "prm-regbits-33xx.h" +#include "prm33xx.h" + +/** + * am3xx_restart - trigger a software restart of the SoC + * @mode: the "reboot mode", see arch/arm/kernel/{setup,process}.c + * @cmd: passed from the userspace program rebooting the system (if provided) + * + * Resets the SoC. For @cmd, see the 'reboot' syscall in + * kernel/sys.c. No return value. + */ +void am33xx_restart(char mode, const char *cmd) +{ + /* TODO: Handle mode and cmd if necessary */ + + am33xx_prm_rmw_reg_bits(AM33XX_GLOBAL_WARM_SW_RST_MASK, + AM33XX_GLOBAL_WARM_SW_RST_MASK, + AM33XX_PRM_DEVICE_MOD, + AM33XX_PRM_RSTCTRL_OFFSET); + + /* OCP barrier */ + (void)am33xx_prm_read_reg(AM33XX_PRM_DEVICE_MOD, + AM33XX_PRM_RSTCTRL_OFFSET); +} diff --git a/arch/arm/mach-omap2/am35xx-emac.c b/arch/arm/mach-omap2/am35xx-emac.c index a00d39107a2..25b79a29736 100644 --- a/arch/arm/mach-omap2/am35xx-emac.c +++ b/arch/arm/mach-omap2/am35xx-emac.c @@ -62,8 +62,7 @@ static int __init omap_davinci_emac_dev_init(struct omap_hwmod *oh, { struct platform_device *pdev; - pdev = omap_device_build(oh->class->name, 0, oh, pdata, pdata_len, - false); + pdev = omap_device_build(oh->class->name, 0, oh, pdata, pdata_len); if (IS_ERR(pdev)) { WARN(1, "Can't build omap_device for %s:%s.\n", oh->class->name, oh->name); diff --git a/arch/arm/mach-omap2/board-generic.c b/arch/arm/mach-omap2/board-generic.c index 2590463e4b5..0274ff7a2a2 100644 --- a/arch/arm/mach-omap2/board-generic.c +++ b/arch/arm/mach-omap2/board-generic.c @@ -140,6 +140,7 @@ DT_MACHINE_START(AM33XX_DT, "Generic AM33XX (Flattened Device Tree)") .init_machine = omap_generic_init, .init_time = omap3_am33xx_gptimer_timer_init, .dt_compat = am33xx_boards_compat, + .restart = am33xx_restart, MACHINE_END #endif diff --git a/arch/arm/mach-omap2/cclock33xx_data.c b/arch/arm/mach-omap2/cclock33xx_data.c index ea64ad60675..476b82066cb 100644 --- a/arch/arm/mach-omap2/cclock33xx_data.c +++ b/arch/arm/mach-omap2/cclock33xx_data.c @@ -284,9 +284,10 @@ DEFINE_STRUCT_CLK(dpll_disp_ck, dpll_core_ck_parents, dpll_ddr_ck_ops); * TODO: Add clksel here (sys_clkin, CORE_CLKOUTM6, PER_CLKOUTM2 * and ALT_CLK1/2) */ -DEFINE_CLK_DIVIDER(dpll_disp_m2_ck, "dpll_disp_ck", &dpll_disp_ck, 0x0, - AM33XX_CM_DIV_M2_DPLL_DISP, AM33XX_DPLL_CLKOUT_DIV_SHIFT, - AM33XX_DPLL_CLKOUT_DIV_WIDTH, CLK_DIVIDER_ONE_BASED, NULL); +DEFINE_CLK_DIVIDER(dpll_disp_m2_ck, "dpll_disp_ck", &dpll_disp_ck, + CLK_SET_RATE_PARENT, AM33XX_CM_DIV_M2_DPLL_DISP, + AM33XX_DPLL_CLKOUT_DIV_SHIFT, AM33XX_DPLL_CLKOUT_DIV_WIDTH, + CLK_DIVIDER_ONE_BASED, NULL); /* DPLL_PER */ static struct dpll_data dpll_per_dd = { @@ -723,7 +724,8 @@ static struct clk_hw_omap lcd_gclk_hw = { .clksel_mask = AM33XX_CLKSEL_0_1_MASK, }; -DEFINE_STRUCT_CLK(lcd_gclk, lcd_ck_parents, gpio_fck_ops); +DEFINE_STRUCT_CLK_FLAGS(lcd_gclk, lcd_ck_parents, + gpio_fck_ops, CLK_SET_RATE_PARENT); DEFINE_CLK_FIXED_FACTOR(mmc_clk, "dpll_per_m2_ck", &dpll_per_m2_ck, 0x0, 1, 2); diff --git a/arch/arm/mach-omap2/cclock3xxx_data.c b/arch/arm/mach-omap2/cclock3xxx_data.c index 6ef87580c33..4579c3c5338 100644 --- a/arch/arm/mach-omap2/cclock3xxx_data.c +++ b/arch/arm/mach-omap2/cclock3xxx_data.c @@ -426,6 +426,7 @@ static struct clk dpll4_m5x2_ck_3630 = { .parent_names = dpll4_m5x2_ck_parent_names, .num_parents = ARRAY_SIZE(dpll4_m5x2_ck_parent_names), .ops = &dpll4_m5x2_ck_3630_ops, + .flags = CLK_SET_RATE_PARENT, }; static struct clk cam_mclk; @@ -443,7 +444,14 @@ static struct clk_hw_omap cam_mclk_hw = { .clkdm_name = "cam_clkdm", }; -DEFINE_STRUCT_CLK(cam_mclk, cam_mclk_parent_names, aes2_ick_ops); +static struct clk cam_mclk = { + .name = "cam_mclk", + .hw = &cam_mclk_hw.hw, + .parent_names = cam_mclk_parent_names, + .num_parents = ARRAY_SIZE(cam_mclk_parent_names), + .ops = &aes2_ick_ops, + .flags = CLK_SET_RATE_PARENT, +}; static const struct clksel_rate clkout2_src_core_rates[] = { { .div = 1, .val = 0, .flags = RATE_IN_3XXX }, diff --git a/arch/arm/mach-omap2/cclock44xx_data.c b/arch/arm/mach-omap2/cclock44xx_data.c index cebe2b31943..3d58f335f17 100644 --- a/arch/arm/mach-omap2/cclock44xx_data.c +++ b/arch/arm/mach-omap2/cclock44xx_data.c @@ -605,15 +605,26 @@ static const char *dpll_usb_ck_parents[] = { static struct clk dpll_usb_ck; +static const struct clk_ops dpll_usb_ck_ops = { + .enable = &omap3_noncore_dpll_enable, + .disable = &omap3_noncore_dpll_disable, + .recalc_rate = &omap3_dpll_recalc, + .round_rate = &omap2_dpll_round_rate, + .set_rate = &omap3_noncore_dpll_set_rate, + .get_parent = &omap2_init_dpll_parent, + .init = &omap2_init_clk_clkdm, +}; + static struct clk_hw_omap dpll_usb_ck_hw = { .hw = { .clk = &dpll_usb_ck, }, .dpll_data = &dpll_usb_dd, + .clkdm_name = "l3_init_clkdm", .ops = &clkhwops_omap3_dpll, }; -DEFINE_STRUCT_CLK(dpll_usb_ck, dpll_usb_ck_parents, dpll_ck_ops); +DEFINE_STRUCT_CLK(dpll_usb_ck, dpll_usb_ck_parents, dpll_usb_ck_ops); static const char *dpll_usb_clkdcoldo_ck_parents[] = { "dpll_usb_ck", diff --git a/arch/arm/mach-omap2/clock.h b/arch/arm/mach-omap2/clock.h index b40204837bd..60ddd8612b4 100644 --- a/arch/arm/mach-omap2/clock.h +++ b/arch/arm/mach-omap2/clock.h @@ -65,6 +65,17 @@ struct clockdomain; .ops = &_clkops_name, \ }; +#define DEFINE_STRUCT_CLK_FLAGS(_name, _parent_array_name, \ + _clkops_name, _flags) \ + static struct clk _name = { \ + .name = #_name, \ + .hw = &_name##_hw.hw, \ + .parent_names = _parent_array_name, \ + .num_parents = ARRAY_SIZE(_parent_array_name), \ + .ops = &_clkops_name, \ + .flags = _flags, \ + }; + #define DEFINE_STRUCT_CLK_HW_OMAP(_name, _clkdm_name) \ static struct clk_hw_omap _name##_hw = { \ .hw = { \ diff --git a/arch/arm/mach-omap2/cm33xx.c b/arch/arm/mach-omap2/cm33xx.c index 058ce3c0873..325a5157657 100644 --- a/arch/arm/mach-omap2/cm33xx.c +++ b/arch/arm/mach-omap2/cm33xx.c @@ -241,9 +241,6 @@ int am33xx_cm_wait_module_ready(u16 inst, s16 cdoffs, u16 clkctrl_offs) { int i = 0; - if (!clkctrl_offs) - return 0; - omap_test_timeout(_is_module_ready(inst, cdoffs, clkctrl_offs), MAX_MODULE_READY_TIME, i); diff --git a/arch/arm/mach-omap2/cm33xx.h b/arch/arm/mach-omap2/cm33xx.h index 5fa0b62e1a7..64f4bafe7bd 100644 --- a/arch/arm/mach-omap2/cm33xx.h +++ b/arch/arm/mach-omap2/cm33xx.h @@ -17,16 +17,11 @@ #ifndef __ARCH_ARM_MACH_OMAP2_CM_33XX_H #define __ARCH_ARM_MACH_OMAP2_CM_33XX_H -#include <linux/delay.h> -#include <linux/errno.h> -#include <linux/err.h> -#include <linux/io.h> - #include "common.h" #include "cm.h" #include "cm-regbits-33xx.h" -#include "cm33xx.h" +#include "iomap.h" /* CM base address */ #define AM33XX_CM_BASE 0x44e00000 @@ -381,6 +376,7 @@ #define AM33XX_CM_CEFUSE_CEFUSE_CLKCTRL AM33XX_CM_REGADDR(AM33XX_CM_CEFUSE_MOD, 0x0020) +#ifndef __ASSEMBLER__ extern bool am33xx_cm_is_clkdm_in_hwsup(s16 inst, u16 cdoffs); extern void am33xx_cm_clkdm_enable_hwsup(s16 inst, u16 cdoffs); extern void am33xx_cm_clkdm_disable_hwsup(s16 inst, u16 cdoffs); @@ -417,4 +413,5 @@ static inline int am33xx_cm_wait_module_ready(u16 inst, s16 cdoffs, } #endif +#endif /* ASSEMBLER */ #endif diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index b4350274361..0a6b9c7a63d 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -119,6 +119,14 @@ static inline void omap2xxx_restart(char mode, const char *cmd) } #endif +#ifdef CONFIG_SOC_AM33XX +void am33xx_restart(char mode, const char *cmd); +#else +static inline void am33xx_restart(char mode, const char *cmd) +{ +} +#endif + #ifdef CONFIG_ARCH_OMAP3 void omap3xxx_restart(char mode, const char *cmd); #else diff --git a/arch/arm/mach-omap2/devices.c b/arch/arm/mach-omap2/devices.c index 142d9c616f1..1ec7f059771 100644 --- a/arch/arm/mach-omap2/devices.c +++ b/arch/arm/mach-omap2/devices.c @@ -426,7 +426,7 @@ static void __init omap_init_hdmi_audio(void) return; } - pdev = omap_device_build("omap-hdmi-audio-dai", -1, oh, NULL, 0, 0); + pdev = omap_device_build("omap-hdmi-audio-dai", -1, oh, NULL, 0); WARN(IS_ERR(pdev), "Can't build omap_device for omap-hdmi-audio-dai.\n"); diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c index 0a02aab5df6..3aed4b0b956 100644 --- a/arch/arm/mach-omap2/dpll3xxx.c +++ b/arch/arm/mach-omap2/dpll3xxx.c @@ -500,8 +500,9 @@ int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate, if (dd->last_rounded_rate == 0) return -EINVAL; - /* No freqsel on OMAP4 and OMAP3630 */ - if (!cpu_is_omap44xx() && !cpu_is_omap3630()) { + /* No freqsel on AM335x, OMAP4 and OMAP3630 */ + if (!soc_is_am33xx() && !cpu_is_omap44xx() && + !cpu_is_omap3630()) { freqsel = _omap3_dpll_compute_freqsel(clk, dd->last_rounded_n); WARN_ON(!freqsel); diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c index 45cc7ed4dd5..8a68f1ec66b 100644 --- a/arch/arm/mach-omap2/id.c +++ b/arch/arm/mach-omap2/id.c @@ -399,8 +399,18 @@ void __init omap3xxx_check_revision(void) } break; case 0xb944: - omap_revision = AM335X_REV_ES1_0; - cpu_rev = "1.0"; + switch (rev) { + case 0: + omap_revision = AM335X_REV_ES1_0; + cpu_rev = "1.0"; + break; + case 1: + /* FALLTHROUGH */ + default: + omap_revision = AM335X_REV_ES2_0; + cpu_rev = "2.0"; + break; + } break; case 0xb8f2: switch (rev) { diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index a8984989dec..c2c798c08c2 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -2055,6 +2055,23 @@ static int _omap4_get_context_lost(struct omap_hwmod *oh) } /** + * _enable_preprogram - Pre-program an IP block during the _enable() process + * @oh: struct omap_hwmod * + * + * Some IP blocks (such as AESS) require some additional programming + * after enable before they can enter idle. If a function pointer to + * do so is present in the hwmod data, then call it and pass along the + * return value; otherwise, return 0. + */ +static int __init _enable_preprogram(struct omap_hwmod *oh) +{ + if (!oh->class->enable_preprogram) + return 0; + + return oh->class->enable_preprogram(oh); +} + +/** * _enable - enable an omap_hwmod * @oh: struct omap_hwmod * * @@ -2160,6 +2177,7 @@ static int _enable(struct omap_hwmod *oh) _update_sysc_cache(oh); _enable_sysc(oh); } + r = _enable_preprogram(oh); } else { if (soc_ops.disable_module) soc_ops.disable_module(oh); @@ -3049,11 +3067,8 @@ static int _am33xx_assert_hardreset(struct omap_hwmod *oh, static int _am33xx_deassert_hardreset(struct omap_hwmod *oh, struct omap_hwmod_rst_info *ohri) { - if (ohri->st_shift) - pr_err("omap_hwmod: %s: %s: hwmod data error: OMAP4 does not support st_shift\n", - oh->name, ohri->name); - return am33xx_prm_deassert_hardreset(ohri->rst_shift, + ohri->st_shift, oh->clkdm->pwrdm.ptr->prcm_offs, oh->prcm.omap4.rstctrl_offs, oh->prcm.omap4.rstst_offs); diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h index 80c00e706d6..d43d9b608ed 100644 --- a/arch/arm/mach-omap2/omap_hwmod.h +++ b/arch/arm/mach-omap2/omap_hwmod.h @@ -510,6 +510,7 @@ struct omap_hwmod_omap4_prcm { * @rev: revision of the IP class * @pre_shutdown: ptr to fn to be executed immediately prior to device shutdown * @reset: ptr to fn to be executed in place of the standard hwmod reset fn + * @enable_preprogram: ptr to fn to be executed during device enable * * Represent the class of a OMAP hardware "modules" (e.g. timer, * smartreflex, gpio, uart...) @@ -533,6 +534,7 @@ struct omap_hwmod_class { u32 rev; int (*pre_shutdown)(struct omap_hwmod *oh); int (*reset)(struct omap_hwmod *oh); + int (*enable_preprogram)(struct omap_hwmod *oh); }; /** @@ -680,6 +682,12 @@ extern void __init omap_hwmod_init(void); const char *omap_hwmod_get_main_clk(struct omap_hwmod *oh); /* + * + */ + +extern int omap_hwmod_aess_preprogram(struct omap_hwmod *oh); + +/* * Chip variant-specific hwmod init routines - XXX should be converted * to use initcalls once the initial boot ordering is straightened out */ diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c index 646c14d9fdb..26eee4a556a 100644 --- a/arch/arm/mach-omap2/omap_hwmod_33xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_33xx_data.c @@ -262,13 +262,15 @@ static struct omap_hwmod am33xx_wkup_m3_hwmod = { .name = "wkup_m3", .class = &am33xx_wkup_m3_hwmod_class, .clkdm_name = "l4_wkup_aon_clkdm", - .flags = HWMOD_INIT_NO_RESET, /* Keep hardreset asserted */ + /* Keep hardreset asserted */ + .flags = HWMOD_INIT_NO_RESET | HWMOD_NO_IDLEST, .mpu_irqs = am33xx_wkup_m3_irqs, .main_clk = "dpll_core_m4_div2_ck", .prcm = { .omap4 = { .clkctrl_offs = AM33XX_CM_WKUP_WKUP_M3_CLKCTRL_OFFSET, .rstctrl_offs = AM33XX_RM_WKUP_RSTCTRL_OFFSET, + .rstst_offs = AM33XX_RM_WKUP_RSTST_OFFSET, .modulemode = MODULEMODE_SWCTRL, }, }, @@ -414,7 +416,6 @@ static struct omap_hwmod am33xx_adc_tsc_hwmod = { * - cEFUSE (doesn't fall under any ocp_if) * - clkdiv32k * - debugss - * - ocmc ram * - ocp watch point * - aes0 * - sha0 @@ -481,25 +482,6 @@ static struct omap_hwmod am33xx_debugss_hwmod = { }, }; -/* ocmcram */ -static struct omap_hwmod_class am33xx_ocmcram_hwmod_class = { - .name = "ocmcram", -}; - -static struct omap_hwmod am33xx_ocmcram_hwmod = { - .name = "ocmcram", - .class = &am33xx_ocmcram_hwmod_class, - .clkdm_name = "l3_clkdm", - .flags = (HWMOD_INIT_NO_IDLE | HWMOD_INIT_NO_RESET), - .main_clk = "l3_gclk", - .prcm = { - .omap4 = { - .clkctrl_offs = AM33XX_CM_PER_OCMCRAM_CLKCTRL_OFFSET, - .modulemode = MODULEMODE_SWCTRL, - }, - }, -}; - /* ocpwp */ static struct omap_hwmod_class am33xx_ocpwp_hwmod_class = { .name = "ocpwp", @@ -570,6 +552,25 @@ static struct omap_hwmod am33xx_sha0_hwmod = { #endif +/* ocmcram */ +static struct omap_hwmod_class am33xx_ocmcram_hwmod_class = { + .name = "ocmcram", +}; + +static struct omap_hwmod am33xx_ocmcram_hwmod = { + .name = "ocmcram", + .class = &am33xx_ocmcram_hwmod_class, + .clkdm_name = "l3_clkdm", + .flags = (HWMOD_INIT_NO_IDLE | HWMOD_INIT_NO_RESET), + .main_clk = "l3_gclk", + .prcm = { + .omap4 = { + .clkctrl_offs = AM33XX_CM_PER_OCMCRAM_CLKCTRL_OFFSET, + .modulemode = MODULEMODE_SWCTRL, + }, + }, +}; + /* 'smartreflex' class */ static struct omap_hwmod_class am33xx_smartreflex_hwmod_class = { .name = "smartreflex", @@ -783,9 +784,7 @@ static struct omap_hwmod am33xx_elm_hwmod = { }, }; -/* - * 'epwmss' class: ecap0,1,2, ehrpwm0,1,2 - */ +/* pwmss */ static struct omap_hwmod_class_sysconfig am33xx_epwmss_sysc = { .rev_offs = 0x0, .sysc_offs = 0x4, @@ -801,18 +800,23 @@ static struct omap_hwmod_class am33xx_epwmss_hwmod_class = { .sysc = &am33xx_epwmss_sysc, }; -/* ehrpwm0 */ -static struct omap_hwmod_irq_info am33xx_ehrpwm0_irqs[] = { - { .name = "int", .irq = 86 + OMAP_INTC_START, }, - { .name = "tzint", .irq = 58 + OMAP_INTC_START, }, - { .irq = -1 }, +static struct omap_hwmod_class am33xx_ecap_hwmod_class = { + .name = "ecap", }; -static struct omap_hwmod am33xx_ehrpwm0_hwmod = { - .name = "ehrpwm0", +static struct omap_hwmod_class am33xx_eqep_hwmod_class = { + .name = "eqep", +}; + +static struct omap_hwmod_class am33xx_ehrpwm_hwmod_class = { + .name = "ehrpwm", +}; + +/* epwmss0 */ +static struct omap_hwmod am33xx_epwmss0_hwmod = { + .name = "epwmss0", .class = &am33xx_epwmss_hwmod_class, .clkdm_name = "l4ls_clkdm", - .mpu_irqs = am33xx_ehrpwm0_irqs, .main_clk = "l4ls_gclk", .prcm = { .omap4 = { @@ -822,63 +826,58 @@ static struct omap_hwmod am33xx_ehrpwm0_hwmod = { }, }; -/* ehrpwm1 */ -static struct omap_hwmod_irq_info am33xx_ehrpwm1_irqs[] = { - { .name = "int", .irq = 87 + OMAP_INTC_START, }, - { .name = "tzint", .irq = 59 + OMAP_INTC_START, }, +/* ecap0 */ +static struct omap_hwmod_irq_info am33xx_ecap0_irqs[] = { + { .irq = 31 + OMAP_INTC_START, }, { .irq = -1 }, }; -static struct omap_hwmod am33xx_ehrpwm1_hwmod = { - .name = "ehrpwm1", - .class = &am33xx_epwmss_hwmod_class, +static struct omap_hwmod am33xx_ecap0_hwmod = { + .name = "ecap0", + .class = &am33xx_ecap_hwmod_class, .clkdm_name = "l4ls_clkdm", - .mpu_irqs = am33xx_ehrpwm1_irqs, + .mpu_irqs = am33xx_ecap0_irqs, .main_clk = "l4ls_gclk", - .prcm = { - .omap4 = { - .clkctrl_offs = AM33XX_CM_PER_EPWMSS1_CLKCTRL_OFFSET, - .modulemode = MODULEMODE_SWCTRL, - }, - }, }; -/* ehrpwm2 */ -static struct omap_hwmod_irq_info am33xx_ehrpwm2_irqs[] = { - { .name = "int", .irq = 39 + OMAP_INTC_START, }, - { .name = "tzint", .irq = 60 + OMAP_INTC_START, }, +/* eqep0 */ +static struct omap_hwmod_irq_info am33xx_eqep0_irqs[] = { + { .irq = 79 + OMAP_INTC_START, }, { .irq = -1 }, }; -static struct omap_hwmod am33xx_ehrpwm2_hwmod = { - .name = "ehrpwm2", - .class = &am33xx_epwmss_hwmod_class, +static struct omap_hwmod am33xx_eqep0_hwmod = { + .name = "eqep0", + .class = &am33xx_eqep_hwmod_class, .clkdm_name = "l4ls_clkdm", - .mpu_irqs = am33xx_ehrpwm2_irqs, + .mpu_irqs = am33xx_eqep0_irqs, .main_clk = "l4ls_gclk", - .prcm = { - .omap4 = { - .clkctrl_offs = AM33XX_CM_PER_EPWMSS2_CLKCTRL_OFFSET, - .modulemode = MODULEMODE_SWCTRL, - }, - }, }; -/* ecap0 */ -static struct omap_hwmod_irq_info am33xx_ecap0_irqs[] = { - { .irq = 31 + OMAP_INTC_START, }, +/* ehrpwm0 */ +static struct omap_hwmod_irq_info am33xx_ehrpwm0_irqs[] = { + { .name = "int", .irq = 86 + OMAP_INTC_START, }, + { .name = "tzint", .irq = 58 + OMAP_INTC_START, }, { .irq = -1 }, }; -static struct omap_hwmod am33xx_ecap0_hwmod = { - .name = "ecap0", +static struct omap_hwmod am33xx_ehrpwm0_hwmod = { + .name = "ehrpwm0", + .class = &am33xx_ehrpwm_hwmod_class, + .clkdm_name = "l4ls_clkdm", + .mpu_irqs = am33xx_ehrpwm0_irqs, + .main_clk = "l4ls_gclk", +}; + +/* epwmss1 */ +static struct omap_hwmod am33xx_epwmss1_hwmod = { + .name = "epwmss1", .class = &am33xx_epwmss_hwmod_class, .clkdm_name = "l4ls_clkdm", - .mpu_irqs = am33xx_ecap0_irqs, .main_clk = "l4ls_gclk", .prcm = { .omap4 = { - .clkctrl_offs = AM33XX_CM_PER_EPWMSS0_CLKCTRL_OFFSET, + .clkctrl_offs = AM33XX_CM_PER_EPWMSS1_CLKCTRL_OFFSET, .modulemode = MODULEMODE_SWCTRL, }, }, @@ -892,13 +891,50 @@ static struct omap_hwmod_irq_info am33xx_ecap1_irqs[] = { static struct omap_hwmod am33xx_ecap1_hwmod = { .name = "ecap1", - .class = &am33xx_epwmss_hwmod_class, + .class = &am33xx_ecap_hwmod_class, .clkdm_name = "l4ls_clkdm", .mpu_irqs = am33xx_ecap1_irqs, .main_clk = "l4ls_gclk", +}; + +/* eqep1 */ +static struct omap_hwmod_irq_info am33xx_eqep1_irqs[] = { + { .irq = 88 + OMAP_INTC_START, }, + { .irq = -1 }, +}; + +static struct omap_hwmod am33xx_eqep1_hwmod = { + .name = "eqep1", + .class = &am33xx_eqep_hwmod_class, + .clkdm_name = "l4ls_clkdm", + .mpu_irqs = am33xx_eqep1_irqs, + .main_clk = "l4ls_gclk", +}; + +/* ehrpwm1 */ +static struct omap_hwmod_irq_info am33xx_ehrpwm1_irqs[] = { + { .name = "int", .irq = 87 + OMAP_INTC_START, }, + { .name = "tzint", .irq = 59 + OMAP_INTC_START, }, + { .irq = -1 }, +}; + +static struct omap_hwmod am33xx_ehrpwm1_hwmod = { + .name = "ehrpwm1", + .class = &am33xx_ehrpwm_hwmod_class, + .clkdm_name = "l4ls_clkdm", + .mpu_irqs = am33xx_ehrpwm1_irqs, + .main_clk = "l4ls_gclk", +}; + +/* epwmss2 */ +static struct omap_hwmod am33xx_epwmss2_hwmod = { + .name = "epwmss2", + .class = &am33xx_epwmss_hwmod_class, + .clkdm_name = "l4ls_clkdm", + .main_clk = "l4ls_gclk", .prcm = { .omap4 = { - .clkctrl_offs = AM33XX_CM_PER_EPWMSS1_CLKCTRL_OFFSET, + .clkctrl_offs = AM33XX_CM_PER_EPWMSS2_CLKCTRL_OFFSET, .modulemode = MODULEMODE_SWCTRL, }, }, @@ -912,16 +948,39 @@ static struct omap_hwmod_irq_info am33xx_ecap2_irqs[] = { static struct omap_hwmod am33xx_ecap2_hwmod = { .name = "ecap2", + .class = &am33xx_ecap_hwmod_class, + .clkdm_name = "l4ls_clkdm", .mpu_irqs = am33xx_ecap2_irqs, - .class = &am33xx_epwmss_hwmod_class, + .main_clk = "l4ls_gclk", +}; + +/* eqep2 */ +static struct omap_hwmod_irq_info am33xx_eqep2_irqs[] = { + { .irq = 89 + OMAP_INTC_START, }, + { .irq = -1 }, +}; + +static struct omap_hwmod am33xx_eqep2_hwmod = { + .name = "eqep2", + .class = &am33xx_eqep_hwmod_class, .clkdm_name = "l4ls_clkdm", + .mpu_irqs = am33xx_eqep2_irqs, + .main_clk = "l4ls_gclk", +}; + +/* ehrpwm2 */ +static struct omap_hwmod_irq_info am33xx_ehrpwm2_irqs[] = { + { .name = "int", .irq = 39 + OMAP_INTC_START, }, + { .name = "tzint", .irq = 60 + OMAP_INTC_START, }, + { .irq = -1 }, +}; + +static struct omap_hwmod am33xx_ehrpwm2_hwmod = { + .name = "ehrpwm2", + .class = &am33xx_ehrpwm_hwmod_class, + .clkdm_name = "l4ls_clkdm", + .mpu_irqs = am33xx_ehrpwm2_irqs, .main_clk = "l4ls_gclk", - .prcm = { - .omap4 = { - .clkctrl_offs = AM33XX_CM_PER_EPWMSS2_CLKCTRL_OFFSET, - .modulemode = MODULEMODE_SWCTRL, - }, - }, }; /* @@ -1824,6 +1883,7 @@ static struct omap_hwmod am33xx_tptc0_hwmod = { .class = &am33xx_tptc_hwmod_class, .clkdm_name = "l3_clkdm", .mpu_irqs = am33xx_tptc0_irqs, + .flags = HWMOD_SWSUP_SIDLE | HWMOD_SWSUP_MSTANDBY, .main_clk = "l3_gclk", .prcm = { .omap4 = { @@ -2496,7 +2556,6 @@ static struct omap_hwmod_addr_space am33xx_cpgmac0_addr_space[] = { { .pa_start = 0x4a100000, .pa_end = 0x4a100000 + SZ_2K - 1, - .flags = ADDR_TYPE_RT, }, /* cpsw wr */ { @@ -2547,162 +2606,202 @@ static struct omap_hwmod_ocp_if am33xx_l4_ls__elm = { .user = OCP_USER_MPU, }; -/* - * Splitting the resources to handle access of PWMSS config space - * and module specific part independently - */ -static struct omap_hwmod_addr_space am33xx_ehrpwm0_addr_space[] = { +static struct omap_hwmod_addr_space am33xx_epwmss0_addr_space[] = { { .pa_start = 0x48300000, .pa_end = 0x48300000 + SZ_16 - 1, .flags = ADDR_TYPE_RT }, - { - .pa_start = 0x48300200, - .pa_end = 0x48300200 + SZ_256 - 1, - .flags = ADDR_TYPE_RT - }, { } }; -static struct omap_hwmod_ocp_if am33xx_l4_ls__ehrpwm0 = { +static struct omap_hwmod_ocp_if am33xx_l4_ls__epwmss0 = { .master = &am33xx_l4_ls_hwmod, - .slave = &am33xx_ehrpwm0_hwmod, + .slave = &am33xx_epwmss0_hwmod, .clk = "l4ls_gclk", - .addr = am33xx_ehrpwm0_addr_space, + .addr = am33xx_epwmss0_addr_space, .user = OCP_USER_MPU, }; -/* - * Splitting the resources to handle access of PWMSS config space - * and module specific part independently - */ -static struct omap_hwmod_addr_space am33xx_ehrpwm1_addr_space[] = { - { - .pa_start = 0x48302000, - .pa_end = 0x48302000 + SZ_16 - 1, - .flags = ADDR_TYPE_RT - }, +static struct omap_hwmod_addr_space am33xx_ecap0_addr_space[] = { { - .pa_start = 0x48302200, - .pa_end = 0x48302200 + SZ_256 - 1, - .flags = ADDR_TYPE_RT + .pa_start = 0x48300100, + .pa_end = 0x48300100 + SZ_128 - 1, }, { } }; -static struct omap_hwmod_ocp_if am33xx_l4_ls__ehrpwm1 = { - .master = &am33xx_l4_ls_hwmod, - .slave = &am33xx_ehrpwm1_hwmod, +static struct omap_hwmod_ocp_if am33xx_epwmss0__ecap0 = { + .master = &am33xx_epwmss0_hwmod, + .slave = &am33xx_ecap0_hwmod, .clk = "l4ls_gclk", - .addr = am33xx_ehrpwm1_addr_space, + .addr = am33xx_ecap0_addr_space, .user = OCP_USER_MPU, }; -/* - * Splitting the resources to handle access of PWMSS config space - * and module specific part independently - */ -static struct omap_hwmod_addr_space am33xx_ehrpwm2_addr_space[] = { +static struct omap_hwmod_addr_space am33xx_eqep0_addr_space[] = { { - .pa_start = 0x48304000, - .pa_end = 0x48304000 + SZ_16 - 1, - .flags = ADDR_TYPE_RT - }, - { - .pa_start = 0x48304200, - .pa_end = 0x48304200 + SZ_256 - 1, - .flags = ADDR_TYPE_RT + .pa_start = 0x48300180, + .pa_end = 0x48300180 + SZ_128 - 1, }, { } }; -static struct omap_hwmod_ocp_if am33xx_l4_ls__ehrpwm2 = { - .master = &am33xx_l4_ls_hwmod, - .slave = &am33xx_ehrpwm2_hwmod, +static struct omap_hwmod_ocp_if am33xx_epwmss0__eqep0 = { + .master = &am33xx_epwmss0_hwmod, + .slave = &am33xx_eqep0_hwmod, .clk = "l4ls_gclk", - .addr = am33xx_ehrpwm2_addr_space, + .addr = am33xx_eqep0_addr_space, .user = OCP_USER_MPU, }; -/* - * Splitting the resources to handle access of PWMSS config space - * and module specific part independently - */ -static struct omap_hwmod_addr_space am33xx_ecap0_addr_space[] = { - { - .pa_start = 0x48300000, - .pa_end = 0x48300000 + SZ_16 - 1, - .flags = ADDR_TYPE_RT - }, +static struct omap_hwmod_addr_space am33xx_ehrpwm0_addr_space[] = { { - .pa_start = 0x48300100, - .pa_end = 0x48300100 + SZ_256 - 1, - .flags = ADDR_TYPE_RT + .pa_start = 0x48300200, + .pa_end = 0x48300200 + SZ_128 - 1, }, { } }; -static struct omap_hwmod_ocp_if am33xx_l4_ls__ecap0 = { - .master = &am33xx_l4_ls_hwmod, - .slave = &am33xx_ecap0_hwmod, +static struct omap_hwmod_ocp_if am33xx_epwmss0__ehrpwm0 = { + .master = &am33xx_epwmss0_hwmod, + .slave = &am33xx_ehrpwm0_hwmod, .clk = "l4ls_gclk", - .addr = am33xx_ecap0_addr_space, + .addr = am33xx_ehrpwm0_addr_space, .user = OCP_USER_MPU, }; -/* - * Splitting the resources to handle access of PWMSS config space - * and module specific part independently - */ -static struct omap_hwmod_addr_space am33xx_ecap1_addr_space[] = { + +static struct omap_hwmod_addr_space am33xx_epwmss1_addr_space[] = { { .pa_start = 0x48302000, .pa_end = 0x48302000 + SZ_16 - 1, .flags = ADDR_TYPE_RT }, + { } +}; + +static struct omap_hwmod_ocp_if am33xx_l4_ls__epwmss1 = { + .master = &am33xx_l4_ls_hwmod, + .slave = &am33xx_epwmss1_hwmod, + .clk = "l4ls_gclk", + .addr = am33xx_epwmss1_addr_space, + .user = OCP_USER_MPU, +}; + +static struct omap_hwmod_addr_space am33xx_ecap1_addr_space[] = { { .pa_start = 0x48302100, - .pa_end = 0x48302100 + SZ_256 - 1, - .flags = ADDR_TYPE_RT + .pa_end = 0x48302100 + SZ_128 - 1, }, { } }; -static struct omap_hwmod_ocp_if am33xx_l4_ls__ecap1 = { - .master = &am33xx_l4_ls_hwmod, +static struct omap_hwmod_ocp_if am33xx_epwmss1__ecap1 = { + .master = &am33xx_epwmss1_hwmod, .slave = &am33xx_ecap1_hwmod, .clk = "l4ls_gclk", .addr = am33xx_ecap1_addr_space, .user = OCP_USER_MPU, }; -/* - * Splitting the resources to handle access of PWMSS config space - * and module specific part independently - */ -static struct omap_hwmod_addr_space am33xx_ecap2_addr_space[] = { +static struct omap_hwmod_addr_space am33xx_eqep1_addr_space[] = { + { + .pa_start = 0x48302180, + .pa_end = 0x48302180 + SZ_128 - 1, + }, + { } +}; + +static struct omap_hwmod_ocp_if am33xx_epwmss1__eqep1 = { + .master = &am33xx_epwmss1_hwmod, + .slave = &am33xx_eqep1_hwmod, + .clk = "l4ls_gclk", + .addr = am33xx_eqep1_addr_space, + .user = OCP_USER_MPU, +}; + +static struct omap_hwmod_addr_space am33xx_ehrpwm1_addr_space[] = { + { + .pa_start = 0x48302200, + .pa_end = 0x48302200 + SZ_128 - 1, + }, + { } +}; + +static struct omap_hwmod_ocp_if am33xx_epwmss1__ehrpwm1 = { + .master = &am33xx_epwmss1_hwmod, + .slave = &am33xx_ehrpwm1_hwmod, + .clk = "l4ls_gclk", + .addr = am33xx_ehrpwm1_addr_space, + .user = OCP_USER_MPU, +}; + +static struct omap_hwmod_addr_space am33xx_epwmss2_addr_space[] = { { .pa_start = 0x48304000, .pa_end = 0x48304000 + SZ_16 - 1, .flags = ADDR_TYPE_RT }, + { } +}; + +static struct omap_hwmod_ocp_if am33xx_l4_ls__epwmss2 = { + .master = &am33xx_l4_ls_hwmod, + .slave = &am33xx_epwmss2_hwmod, + .clk = "l4ls_gclk", + .addr = am33xx_epwmss2_addr_space, + .user = OCP_USER_MPU, +}; + +static struct omap_hwmod_addr_space am33xx_ecap2_addr_space[] = { { .pa_start = 0x48304100, - .pa_end = 0x48304100 + SZ_256 - 1, - .flags = ADDR_TYPE_RT + .pa_end = 0x48304100 + SZ_128 - 1, }, { } }; -static struct omap_hwmod_ocp_if am33xx_l4_ls__ecap2 = { - .master = &am33xx_l4_ls_hwmod, +static struct omap_hwmod_ocp_if am33xx_epwmss2__ecap2 = { + .master = &am33xx_epwmss2_hwmod, .slave = &am33xx_ecap2_hwmod, .clk = "l4ls_gclk", .addr = am33xx_ecap2_addr_space, .user = OCP_USER_MPU, }; +static struct omap_hwmod_addr_space am33xx_eqep2_addr_space[] = { + { + .pa_start = 0x48304180, + .pa_end = 0x48304180 + SZ_128 - 1, + }, + { } +}; + +static struct omap_hwmod_ocp_if am33xx_epwmss2__eqep2 = { + .master = &am33xx_epwmss2_hwmod, + .slave = &am33xx_eqep2_hwmod, + .clk = "l4ls_gclk", + .addr = am33xx_eqep2_addr_space, + .user = OCP_USER_MPU, +}; + +static struct omap_hwmod_addr_space am33xx_ehrpwm2_addr_space[] = { + { + .pa_start = 0x48304200, + .pa_end = 0x48304200 + SZ_128 - 1, + }, + { } +}; + +static struct omap_hwmod_ocp_if am33xx_epwmss2__ehrpwm2 = { + .master = &am33xx_epwmss2_hwmod, + .slave = &am33xx_ehrpwm2_hwmod, + .clk = "l4ls_gclk", + .addr = am33xx_ehrpwm2_addr_space, + .user = OCP_USER_MPU, +}; + /* l3s cfg -> gpmc */ static struct omap_hwmod_addr_space am33xx_gpmc_addr_space[] = { { @@ -3328,6 +3427,13 @@ static struct omap_hwmod_ocp_if am33xx_l3_s__usbss = { .flags = OCPIF_SWSUP_IDLE, }; +/* l3 main -> ocmc */ +static struct omap_hwmod_ocp_if am33xx_l3_main__ocmc = { + .master = &am33xx_l3_main_hwmod, + .slave = &am33xx_ocmcram_hwmod, + .user = OCP_USER_MPU | OCP_USER_SDMA, +}; + static struct omap_hwmod_ocp_if *am33xx_hwmod_ocp_ifs[] __initdata = { &am33xx_l4_fw__emif_fw, &am33xx_l3_main__emif, @@ -3385,12 +3491,18 @@ static struct omap_hwmod_ocp_if *am33xx_hwmod_ocp_ifs[] __initdata = { &am33xx_l4_ls__uart6, &am33xx_l4_ls__spinlock, &am33xx_l4_ls__elm, - &am33xx_l4_ls__ehrpwm0, - &am33xx_l4_ls__ehrpwm1, - &am33xx_l4_ls__ehrpwm2, - &am33xx_l4_ls__ecap0, - &am33xx_l4_ls__ecap1, - &am33xx_l4_ls__ecap2, + &am33xx_l4_ls__epwmss0, + &am33xx_epwmss0__ecap0, + &am33xx_epwmss0__eqep0, + &am33xx_epwmss0__ehrpwm0, + &am33xx_l4_ls__epwmss1, + &am33xx_epwmss1__ecap1, + &am33xx_epwmss1__eqep1, + &am33xx_epwmss1__ehrpwm1, + &am33xx_l4_ls__epwmss2, + &am33xx_epwmss2__ecap2, + &am33xx_epwmss2__eqep2, + &am33xx_epwmss2__ehrpwm2, &am33xx_l3_s__gpmc, &am33xx_l3_main__lcdc, &am33xx_l4_ls__mcspi0, @@ -3398,6 +3510,7 @@ static struct omap_hwmod_ocp_if *am33xx_hwmod_ocp_ifs[] __initdata = { &am33xx_l3_main__tptc0, &am33xx_l3_main__tptc1, &am33xx_l3_main__tptc2, + &am33xx_l3_main__ocmc, &am33xx_l3_s__usbss, &am33xx_l4_hs__cpgmac0, &am33xx_cpgmac0__mdio, diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c index 8bb2628df34..ac7e03ec952 100644 --- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c @@ -3493,7 +3493,12 @@ static struct omap_hwmod am35xx_emac_hwmod = { .name = "davinci_emac", .mpu_irqs = am35xx_emac_mpu_irqs, .class = &am35xx_emac_class, - .flags = HWMOD_NO_IDLEST, + /* + * According to Mark Greer, the MPU will not return from WFI + * when the EMAC signals an interrupt. + * http://www.spinics.net/lists/arm-kernel/msg174734.html + */ + .flags = (HWMOD_NO_IDLEST | HWMOD_BLOCK_WFI), }; /* l3_core -> davinci emac interface */ diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c index 7ec1083ff60..0e47d2e1687 100644 --- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c +++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c @@ -322,6 +322,7 @@ static struct omap_hwmod_class_sysconfig omap44xx_aess_sysc = { static struct omap_hwmod_class omap44xx_aess_hwmod_class = { .name = "aess", .sysc = &omap44xx_aess_sysc, + .enable_preprogram = omap_hwmod_aess_preprogram, }; /* aess */ @@ -348,7 +349,7 @@ static struct omap_hwmod omap44xx_aess_hwmod = { .clkdm_name = "abe_clkdm", .mpu_irqs = omap44xx_aess_irqs, .sdma_reqs = omap44xx_aess_sdma_reqs, - .main_clk = "aess_fck", + .main_clk = "aess_fclk", .prcm = { .omap4 = { .clkctrl_offs = OMAP4_CM1_ABE_AESS_CLKCTRL_OFFSET, @@ -4241,6 +4242,27 @@ static struct omap_hwmod_ocp_if omap44xx_l4_cfg__ocp_wp_noc = { static struct omap_hwmod_addr_space omap44xx_aess_addrs[] = { { + .name = "dmem", + .pa_start = 0x40180000, + .pa_end = 0x4018ffff + }, + { + .name = "cmem", + .pa_start = 0x401a0000, + .pa_end = 0x401a1fff + }, + { + .name = "smem", + .pa_start = 0x401c0000, + .pa_end = 0x401c5fff + }, + { + .name = "pmem", + .pa_start = 0x401e0000, + .pa_end = 0x401e1fff + }, + { + .name = "mpu", .pa_start = 0x401f1000, .pa_end = 0x401f13ff, .flags = ADDR_TYPE_RT @@ -4259,6 +4281,27 @@ static struct omap_hwmod_ocp_if __maybe_unused omap44xx_l4_abe__aess = { static struct omap_hwmod_addr_space omap44xx_aess_dma_addrs[] = { { + .name = "dmem_dma", + .pa_start = 0x49080000, + .pa_end = 0x4908ffff + }, + { + .name = "cmem_dma", + .pa_start = 0x490a0000, + .pa_end = 0x490a1fff + }, + { + .name = "smem_dma", + .pa_start = 0x490c0000, + .pa_end = 0x490c5fff + }, + { + .name = "pmem_dma", + .pa_start = 0x490e0000, + .pa_end = 0x490e1fff + }, + { + .name = "dma", .pa_start = 0x490f1000, .pa_end = 0x490f13ff, .flags = ADDR_TYPE_RT @@ -6268,7 +6311,7 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = { &omap44xx_l3_main_1__l3_main_3, &omap44xx_l3_main_2__l3_main_3, &omap44xx_l4_cfg__l3_main_3, - /* &omap44xx_aess__l4_abe, */ + &omap44xx_aess__l4_abe, &omap44xx_dsp__l4_abe, &omap44xx_l3_main_1__l4_abe, &omap44xx_mpu__l4_abe, @@ -6277,8 +6320,8 @@ static struct omap_hwmod_ocp_if *omap44xx_hwmod_ocp_ifs[] __initdata = { &omap44xx_l4_cfg__l4_wkup, &omap44xx_mpu__mpu_private, &omap44xx_l4_cfg__ocp_wp_noc, - /* &omap44xx_l4_abe__aess, */ - /* &omap44xx_l4_abe__aess_dma, */ + &omap44xx_l4_abe__aess, + &omap44xx_l4_abe__aess_dma, &omap44xx_l3_main_2__c2c, &omap44xx_l4_wkup__counter_32k, &omap44xx_l4_cfg__ctrl_module_core, diff --git a/arch/arm/mach-omap2/omap_hwmod_reset.c b/arch/arm/mach-omap2/omap_hwmod_reset.c new file mode 100644 index 00000000000..65e186c9df5 --- /dev/null +++ b/arch/arm/mach-omap2/omap_hwmod_reset.c @@ -0,0 +1,53 @@ +/* + * OMAP IP block custom reset and preprogramming stubs + * + * Copyright (C) 2012 Texas Instruments, Inc. + * Paul Walmsley + * + * A small number of IP blocks need custom reset and preprogramming + * functions. The stubs in this file provide a standard way for the + * hwmod code to call these functions, which are to be located under + * drivers/. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ +#include <linux/kernel.h> +#include <linux/errno.h> + +#include <sound/aess.h> + +#include "omap_hwmod.h" + +/** + * omap_hwmod_aess_preprogram - enable AESS internal autogating + * @oh: struct omap_hwmod * + * + * The AESS will not IdleAck to the PRCM until its internal autogating + * is enabled. Since internal autogating is disabled by default after + * AESS reset, we must enable autogating after the hwmod code resets + * the AESS. Returns 0. + */ +int omap_hwmod_aess_preprogram(struct omap_hwmod *oh) +{ + void __iomem *va; + + va = omap_hwmod_get_mpu_rt_va(oh); + if (!va) + return -EINVAL; + + aess_enable_autogating(va); + + return 0; +} diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index cd6682df562..673a4c1d1d7 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -282,19 +282,19 @@ int __init omap2_common_pm_late_init(void) * a completely different mechanism. * Disable this part if a DT blob is available. */ - if (of_have_populated_dt()) - return 0; + if (!of_have_populated_dt()) { - /* Init the voltage layer */ - omap_pmic_late_init(); - omap_voltage_late_init(); + /* Init the voltage layer */ + omap_pmic_late_init(); + omap_voltage_late_init(); - /* Initialize the voltages */ - omap3_init_voltages(); - omap4_init_voltages(); + /* Initialize the voltages */ + omap3_init_voltages(); + omap4_init_voltages(); - /* Smartreflex device init */ - omap_devinit_smartreflex(); + /* Smartreflex device init */ + omap_devinit_smartreflex(); + } #ifdef CONFIG_SUSPEND suspend_set_ops(&omap_pm_ops); diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c index b2a4df62354..b59d9390834 100644 --- a/arch/arm/mach-omap2/pm24xx.c +++ b/arch/arm/mach-omap2/pm24xx.c @@ -54,7 +54,6 @@ #include "powerdomain.h" #include "clockdomain.h" -static void (*omap2_sram_idle)(void); static void (*omap2_sram_suspend)(u32 dllctrl, void __iomem *sdrc_dlla_ctrl, void __iomem *sdrc_power); @@ -163,6 +162,8 @@ static int omap2_allow_mpu_retention(void) static void omap2_enter_mpu_retention(void) { + const int zero = 0; + /* The peripherals seem not to be able to wake up the MPU when * it is in retention mode. */ if (omap2_allow_mpu_retention()) { @@ -179,7 +180,8 @@ static void omap2_enter_mpu_retention(void) pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON); } - omap2_sram_idle(); + /* WFI */ + asm("mcr p15, 0, %0, c7, c0, 4" : : "r" (zero) : "memory", "cc"); pwrdm_set_next_pwrst(mpu_pwrdm, PWRDM_POWER_ON); } @@ -333,11 +335,9 @@ int __init omap2_pm_init(void) /* * We copy the assembler sleep/wakeup routines to SRAM. * These routines need to be in SRAM as that's the only - * memory the MPU can see when it wakes up. + * memory the MPU can see when it wakes up after the entire + * chip enters idle. */ - omap2_sram_idle = omap_sram_push(omap24xx_idle_loop_suspend, - omap24xx_idle_loop_suspend_sz); - omap2_sram_suspend = omap_sram_push(omap24xx_cpu_suspend, omap24xx_cpu_suspend_sz); diff --git a/arch/arm/mach-omap2/pm44xx.c b/arch/arm/mach-omap2/pm44xx.c index aa6fd98f606..ea62e75ef21 100644 --- a/arch/arm/mach-omap2/pm44xx.c +++ b/arch/arm/mach-omap2/pm44xx.c @@ -77,10 +77,20 @@ static int omap4_pm_suspend(void) omap_set_pwrdm_state(pwrst->pwrdm, pwrst->saved_state); pwrdm_set_logic_retst(pwrst->pwrdm, pwrst->saved_logic_state); } - if (ret) + if (ret) { pr_crit("Could not enter target state in pm_suspend\n"); - else + /* + * OMAP4 chip PM currently works only with certain (newer) + * versions of bootloaders. This is due to missing code in the + * kernel to properly reset and initialize some devices. + * Warn the user about the bootloader version being one of the + * possible causes. + * http://www.spinics.net/lists/arm-kernel/msg218641.html + */ + pr_warn("A possible cause could be an old bootloader - try u-boot >= v2012.07\n"); + } else { pr_info("Successfully put all powerdomains to target state\n"); + } return 0; } @@ -146,6 +156,13 @@ int __init omap4_pm_init(void) } pr_err("Power Management for TI OMAP4.\n"); + /* + * OMAP4 chip PM currently works only with certain (newer) + * versions of bootloaders. This is due to missing code in the + * kernel to properly reset and initialize some devices. + * http://www.spinics.net/lists/arm-kernel/msg218641.html + */ + pr_warn("OMAP4 PM: u-boot >= v2012.07 is required for full PM support\n"); ret = pwrdm_for_each(pwrdms_setup, NULL); if (ret) { diff --git a/arch/arm/mach-omap2/prm33xx.c b/arch/arm/mach-omap2/prm33xx.c index 1ac73883f89..44c0d7216aa 100644 --- a/arch/arm/mach-omap2/prm33xx.c +++ b/arch/arm/mach-omap2/prm33xx.c @@ -110,11 +110,11 @@ int am33xx_prm_assert_hardreset(u8 shift, s16 inst, u16 rstctrl_offs) * -EINVAL upon an argument error, -EEXIST if the submodule was already out * of reset, or -EBUSY if the submodule did not exit reset promptly. */ -int am33xx_prm_deassert_hardreset(u8 shift, s16 inst, +int am33xx_prm_deassert_hardreset(u8 shift, u8 st_shift, s16 inst, u16 rstctrl_offs, u16 rstst_offs) { int c; - u32 mask = 1 << shift; + u32 mask = 1 << st_shift; /* Check the current status to avoid de-asserting the line twice */ if (am33xx_prm_is_hardreset_asserted(shift, inst, rstctrl_offs) == 0) @@ -122,11 +122,14 @@ int am33xx_prm_deassert_hardreset(u8 shift, s16 inst, /* Clear the reset status by writing 1 to the status bit */ am33xx_prm_rmw_reg_bits(0xffffffff, mask, inst, rstst_offs); + /* de-assert the reset control line */ + mask = 1 << shift; + am33xx_prm_rmw_reg_bits(mask, 0, inst, rstctrl_offs); - /* wait the status to be set */ - omap_test_timeout(am33xx_prm_is_hardreset_asserted(shift, inst, + /* wait the status to be set */ + omap_test_timeout(am33xx_prm_is_hardreset_asserted(st_shift, inst, rstst_offs), MAX_MODULE_HARDRESET_WAIT, c); diff --git a/arch/arm/mach-omap2/prm33xx.h b/arch/arm/mach-omap2/prm33xx.h index 3f25c563a82..9b9918dfb11 100644 --- a/arch/arm/mach-omap2/prm33xx.h +++ b/arch/arm/mach-omap2/prm33xx.h @@ -117,6 +117,7 @@ #define AM33XX_PM_CEFUSE_PWRSTST_OFFSET 0x0004 #define AM33XX_PM_CEFUSE_PWRSTST AM33XX_PRM_REGADDR(AM33XX_PRM_CEFUSE_MOD, 0x0004) +#ifndef __ASSEMBLER__ extern u32 am33xx_prm_read_reg(s16 inst, u16 idx); extern void am33xx_prm_write_reg(u32 val, s16 inst, u16 idx); extern u32 am33xx_prm_rmw_reg_bits(u32 mask, u32 bits, s16 inst, s16 idx); @@ -124,6 +125,7 @@ extern void am33xx_prm_global_warm_sw_reset(void); extern int am33xx_prm_is_hardreset_asserted(u8 shift, s16 inst, u16 rstctrl_offs); extern int am33xx_prm_assert_hardreset(u8 shift, s16 inst, u16 rstctrl_offs); -extern int am33xx_prm_deassert_hardreset(u8 shift, s16 inst, +extern int am33xx_prm_deassert_hardreset(u8 shift, u8 st_shift, s16 inst, u16 rstctrl_offs, u16 rstst_offs); +#endif /* ASSEMBLER */ #endif diff --git a/arch/arm/mach-omap2/sleep24xx.S b/arch/arm/mach-omap2/sleep24xx.S index ce0ccd26efb..1d3cb25c962 100644 --- a/arch/arm/mach-omap2/sleep24xx.S +++ b/arch/arm/mach-omap2/sleep24xx.S @@ -37,25 +37,6 @@ .text /* - * Forces OMAP into idle state - * - * omap24xx_idle_loop_suspend() - This bit of code just executes the WFI - * for normal idles. - * - * Note: This code get's copied to internal SRAM at boot. When the OMAP - * wakes up it continues execution at the point it went to sleep. - */ - .align 3 -ENTRY(omap24xx_idle_loop_suspend) - stmfd sp!, {r0, lr} @ save registers on stack - mov r0, #0 @ clear for mcr setup - mcr p15, 0, r0, c7, c0, 4 @ wait for interrupt - ldmfd sp!, {r0, pc} @ restore regs and return - -ENTRY(omap24xx_idle_loop_suspend_sz) - .word . - omap24xx_idle_loop_suspend - -/* * omap24xx_cpu_suspend() - Forces OMAP into deep sleep state by completing * SDRC shutdown then ARM shutdown. Upon wake MPU is back on so just restore * SDRC. diff --git a/arch/arm/mach-omap2/soc.h b/arch/arm/mach-omap2/soc.h index 092aedd7ed1..c62116bbc76 100644 --- a/arch/arm/mach-omap2/soc.h +++ b/arch/arm/mach-omap2/soc.h @@ -395,6 +395,7 @@ IS_OMAP_TYPE(3430, 0x3430) #define AM335X_CLASS 0x33500033 #define AM335X_REV_ES1_0 AM335X_CLASS +#define AM335X_REV_ES2_0 (AM335X_CLASS | (0x1 << 8)) #define OMAP443X_CLASS 0x44300044 #define OMAP4430_REV_ES1_0 (OMAP443X_CLASS | (0x10 << 8)) diff --git a/arch/arm/mach-omap2/sr_device.c b/arch/arm/mach-omap2/sr_device.c index bb829e06540..d7bc33f1534 100644 --- a/arch/arm/mach-omap2/sr_device.c +++ b/arch/arm/mach-omap2/sr_device.c @@ -152,7 +152,7 @@ static int __init sr_dev_init(struct omap_hwmod *oh, void *user) sr_data->enable_on_init = sr_enable_on_init; - pdev = omap_device_build(name, i, oh, sr_data, sizeof(*sr_data), 0); + pdev = omap_device_build(name, i, oh, sr_data, sizeof(*sr_data)); if (IS_ERR(pdev)) pr_warning("%s: Could not build omap_device for %s: %s.\n\n", __func__, name, oh->name); diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig index 67c859cf16b..ce66eb9be48 100644 --- a/arch/arm/plat-omap/Kconfig +++ b/arch/arm/plat-omap/Kconfig @@ -147,15 +147,6 @@ config OMAP3_L2_AUX_SECURE_SERVICE_SET_ID help PPA routine service ID for setting L2 auxiliary control register. -config OMAP_32K_TIMER_HZ - int "Kernel internal timer frequency for 32KHz timer" - range 32 1024 - depends on OMAP_32K_TIMER - default "128" - help - Kernel internal timer frequency should be a divisor of 32768, - such as 64 or 128. - config OMAP_DM_TIMER bool "Use dual-mode timer" depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS diff --git a/arch/arm/plat-omap/include/plat/timex.h b/arch/arm/plat-omap/include/plat/timex.h index 6d35767bc48..e27d2daa779 100644 --- a/arch/arm/plat-omap/include/plat/timex.h +++ b/arch/arm/plat-omap/include/plat/timex.h @@ -28,14 +28,6 @@ #if !defined(__ASM_ARCH_OMAP_TIMEX_H) #define __ASM_ARCH_OMAP_TIMEX_H -/* - * OMAP 32KHz timer updates time one jiffie at a time from a secondary timer, - * and that's why the CLOCK_TICK_RATE is not 32768. - */ -#ifdef CONFIG_OMAP_32K_TIMER -#define CLOCK_TICK_RATE (CONFIG_OMAP_32K_TIMER_HZ) -#else #define CLOCK_TICK_RATE (HZ * 100000UL) -#endif #endif /* __ASM_ARCH_OMAP_TIMEX_H */ diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index f84f5c57de3..60308053fdb 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -509,3 +509,4 @@ void local_touch_nmi(void) { __this_cpu_write(last_nmi_rip, 0); } +EXPORT_SYMBOL_GPL(local_touch_nmi); diff --git a/block/Kconfig b/block/Kconfig index 4a85ccf8d4c..a7e40a7c821 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -4,7 +4,6 @@ menuconfig BLOCK bool "Enable the block layer" if EXPERT default y - select PERCPU_RWSEM help Provide block layer support for the kernel. diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8bdebb6781e..b2b9837f9dd 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -26,11 +26,32 @@ static DEFINE_MUTEX(blkcg_pol_mutex); -struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT }; +struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, + .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; EXPORT_SYMBOL_GPL(blkcg_root); static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; +static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, + struct request_queue *q, bool update_hint); + +/** + * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants + * @d_blkg: loop cursor pointing to the current descendant + * @pos_cgrp: used for iteration + * @p_blkg: target blkg to walk descendants of + * + * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU + * read locked. If called under either blkcg or queue lock, the iteration + * is guaranteed to include all and only online blkgs. The caller may + * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip + * subtree. + */ +#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ + cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ + if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ + (p_blkg)->q, false))) + static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -112,9 +133,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->pd[i] = pd; pd->blkg = blkg; + pd->plid = i; /* invoke per-policy init */ - if (blkcg_policy_enabled(blkg->q, pol)) + if (pol->pd_init_fn) pol->pd_init_fn(blkg); } @@ -125,8 +147,19 @@ err_free: return NULL; } +/** + * __blkg_lookup - internal version of blkg_lookup() + * @blkcg: blkcg of interest + * @q: request_queue of interest + * @update_hint: whether to update lookup hint with the result or not + * + * This is internal version and shouldn't be used by policy + * implementations. Looks up blkgs for the @blkcg - @q pair regardless of + * @q's bypass state. If @update_hint is %true, the caller should be + * holding @q->queue_lock and lookup hint is updated on success. + */ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, - struct request_queue *q) + struct request_queue *q, bool update_hint) { struct blkcg_gq *blkg; @@ -135,14 +168,19 @@ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, return blkg; /* - * Hint didn't match. Look up from the radix tree. Note that we - * may not be holding queue_lock and thus are not sure whether - * @blkg from blkg_tree has already been removed or not, so we - * can't update hint to the lookup result. Leave it to the caller. + * Hint didn't match. Look up from the radix tree. Note that the + * hint can only be updated under queue_lock as otherwise @blkg + * could have already been removed from blkg_tree. The caller is + * responsible for grabbing queue_lock if @update_hint. */ blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); - if (blkg && blkg->q == q) + if (blkg && blkg->q == q) { + if (update_hint) { + lockdep_assert_held(q->queue_lock); + rcu_assign_pointer(blkcg->blkg_hint, blkg); + } return blkg; + } return NULL; } @@ -162,7 +200,7 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) if (unlikely(blk_queue_bypass(q))) return NULL; - return __blkg_lookup(blkcg, q); + return __blkg_lookup(blkcg, q, false); } EXPORT_SYMBOL_GPL(blkg_lookup); @@ -170,75 +208,129 @@ EXPORT_SYMBOL_GPL(blkg_lookup); * If @new_blkg is %NULL, this function tries to allocate a new one as * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. */ -static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q, - struct blkcg_gq *new_blkg) +static struct blkcg_gq *blkg_create(struct blkcg *blkcg, + struct request_queue *q, + struct blkcg_gq *new_blkg) { struct blkcg_gq *blkg; - int ret; + int i, ret; WARN_ON_ONCE(!rcu_read_lock_held()); lockdep_assert_held(q->queue_lock); - /* lookup and update hint on success, see __blkg_lookup() for details */ - blkg = __blkg_lookup(blkcg, q); - if (blkg) { - rcu_assign_pointer(blkcg->blkg_hint, blkg); - goto out_free; - } - /* blkg holds a reference to blkcg */ if (!css_tryget(&blkcg->css)) { - blkg = ERR_PTR(-EINVAL); - goto out_free; + ret = -EINVAL; + goto err_free_blkg; } /* allocate */ if (!new_blkg) { new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); if (unlikely(!new_blkg)) { - blkg = ERR_PTR(-ENOMEM); - goto out_put; + ret = -ENOMEM; + goto err_put_css; } } blkg = new_blkg; - /* insert */ + /* link parent and insert */ + if (blkcg_parent(blkcg)) { + blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); + if (WARN_ON_ONCE(!blkg->parent)) { + blkg = ERR_PTR(-EINVAL); + goto err_put_css; + } + blkg_get(blkg->parent); + } + spin_lock(&blkcg->lock); ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); if (likely(!ret)) { hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); list_add(&blkg->q_node, &q->blkg_list); + + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_online_fn) + pol->pd_online_fn(blkg); + } } + blkg->online = true; spin_unlock(&blkcg->lock); if (!ret) return blkg; - blkg = ERR_PTR(ret); -out_put: + /* @blkg failed fully initialized, use the usual release path */ + blkg_put(blkg); + return ERR_PTR(ret); + +err_put_css: css_put(&blkcg->css); -out_free: +err_free_blkg: blkg_free(new_blkg); - return blkg; + return ERR_PTR(ret); } +/** + * blkg_lookup_create - lookup blkg, try to create one if not there + * @blkcg: blkcg of interest + * @q: request_queue of interest + * + * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to + * create one. blkg creation is performed recursively from blkcg_root such + * that all non-root blkg's have access to the parent blkg. This function + * should be called under RCU read lock and @q->queue_lock. + * + * Returns pointer to the looked up or created blkg on success, ERR_PTR() + * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not + * dead and bypassing, returns ERR_PTR(-EBUSY). + */ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q) { + struct blkcg_gq *blkg; + + WARN_ON_ONCE(!rcu_read_lock_held()); + lockdep_assert_held(q->queue_lock); + /* * This could be the first entry point of blkcg implementation and * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); - return __blkg_lookup_create(blkcg, q, NULL); + + blkg = __blkg_lookup(blkcg, q, true); + if (blkg) + return blkg; + + /* + * Create blkgs walking down from blkcg_root to @blkcg, so that all + * non-root blkgs have access to their parents. + */ + while (true) { + struct blkcg *pos = blkcg; + struct blkcg *parent = blkcg_parent(blkcg); + + while (parent && !__blkg_lookup(parent, q, false)) { + pos = parent; + parent = blkcg_parent(parent); + } + + blkg = blkg_create(pos, q, NULL); + if (pos == blkcg || IS_ERR(blkg)) + return blkg; + } } EXPORT_SYMBOL_GPL(blkg_lookup_create); static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; + int i; lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(&blkcg->lock); @@ -247,6 +339,14 @@ static void blkg_destroy(struct blkcg_gq *blkg) WARN_ON_ONCE(list_empty(&blkg->q_node)); WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_offline_fn) + pol->pd_offline_fn(blkg); + } + blkg->online = false; + radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); list_del_init(&blkg->q_node); hlist_del_init_rcu(&blkg->blkcg_node); @@ -301,8 +401,10 @@ static void blkg_rcu_free(struct rcu_head *rcu_head) void __blkg_release(struct blkcg_gq *blkg) { - /* release the extra blkcg reference this blkg has been holding */ + /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); + if (blkg->parent) + blkg_put(blkg->parent); /* * A group is freed in rcu manner. But having an rcu lock does not @@ -401,8 +503,9 @@ static const char *blkg_dev_name(struct blkcg_gq *blkg) * * This function invokes @prfill on each blkg of @blkcg if pd for the * policy specified by @pol exists. @prfill is invoked with @sf, the - * policy data and @data. If @show_total is %true, the sum of the return - * values from @prfill is printed with "Total" label at the end. + * policy data and @data and the matching queue lock held. If @show_total + * is %true, the sum of the return values from @prfill is printed with + * "Total" label at the end. * * This is to be used to construct print functions for * cftype->read_seq_string method. @@ -416,11 +519,14 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, struct blkcg_gq *blkg; u64 total = 0; - spin_lock_irq(&blkcg->lock); - hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) + rcu_read_lock(); + hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { + spin_lock_irq(blkg->q->queue_lock); if (blkcg_policy_enabled(blkg->q, pol)) total += prfill(sf, blkg->pd[pol->plid], data); - spin_unlock_irq(&blkcg->lock); + spin_unlock_irq(blkg->q->queue_lock); + } + rcu_read_unlock(); if (show_total) seq_printf(sf, "Total %llu\n", (unsigned long long)total); @@ -479,6 +585,7 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); return v; } +EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); /** * blkg_prfill_stat - prfill callback for blkg_stat @@ -512,6 +619,82 @@ u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); /** + * blkg_stat_recursive_sum - collect hierarchical blkg_stat + * @pd: policy private data of interest + * @off: offset to the blkg_stat in @pd + * + * Collect the blkg_stat specified by @off from @pd and all its online + * descendants and return the sum. The caller must be holding the queue + * lock for online tests. + */ +u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) +{ + struct blkcg_policy *pol = blkcg_policy[pd->plid]; + struct blkcg_gq *pos_blkg; + struct cgroup *pos_cgrp; + u64 sum; + + lockdep_assert_held(pd->blkg->q->queue_lock); + + sum = blkg_stat_read((void *)pd + off); + + rcu_read_lock(); + blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { + struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); + struct blkg_stat *stat = (void *)pos_pd + off; + + if (pos_blkg->online) + sum += blkg_stat_read(stat); + } + rcu_read_unlock(); + + return sum; +} +EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); + +/** + * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat + * @pd: policy private data of interest + * @off: offset to the blkg_stat in @pd + * + * Collect the blkg_rwstat specified by @off from @pd and all its online + * descendants and return the sum. The caller must be holding the queue + * lock for online tests. + */ +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, + int off) +{ + struct blkcg_policy *pol = blkcg_policy[pd->plid]; + struct blkcg_gq *pos_blkg; + struct cgroup *pos_cgrp; + struct blkg_rwstat sum; + int i; + + lockdep_assert_held(pd->blkg->q->queue_lock); + + sum = blkg_rwstat_read((void *)pd + off); + + rcu_read_lock(); + blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { + struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); + struct blkg_rwstat *rwstat = (void *)pos_pd + off; + struct blkg_rwstat tmp; + + if (!pos_blkg->online) + continue; + + tmp = blkg_rwstat_read(rwstat); + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + sum.cnt[i] += tmp.cnt[i]; + } + rcu_read_unlock(); + + return sum; +} +EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); + +/** * blkg_conf_prep - parse and prepare for per-blkg config update * @blkcg: target block cgroup * @pol: target policy @@ -656,6 +839,7 @@ static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup) return ERR_PTR(-ENOMEM); blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; + blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ done: spin_lock_init(&blkcg->lock); @@ -775,7 +959,7 @@ int blkcg_activate_policy(struct request_queue *q, const struct blkcg_policy *pol) { LIST_HEAD(pds); - struct blkcg_gq *blkg; + struct blkcg_gq *blkg, *new_blkg; struct blkg_policy_data *pd, *n; int cnt = 0, ret; bool preloaded; @@ -784,19 +968,27 @@ int blkcg_activate_policy(struct request_queue *q, return 0; /* preallocations for root blkg */ - blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); - if (!blkg) + new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); + if (!new_blkg) return -ENOMEM; preloaded = !radix_tree_preload(GFP_KERNEL); blk_queue_bypass_start(q); - /* make sure the root blkg exists and count the existing blkgs */ + /* + * Make sure the root blkg exists and count the existing blkgs. As + * @q is bypassing at this point, blkg_lookup_create() can't be + * used. Open code it. + */ spin_lock_irq(q->queue_lock); rcu_read_lock(); - blkg = __blkg_lookup_create(&blkcg_root, q, blkg); + blkg = __blkg_lookup(&blkcg_root, q, false); + if (blkg) + blkg_free(new_blkg); + else + blkg = blkg_create(&blkcg_root, q, new_blkg); rcu_read_unlock(); if (preloaded) @@ -844,6 +1036,7 @@ int blkcg_activate_policy(struct request_queue *q, blkg->pd[pol->plid] = pd; pd->blkg = blkg; + pd->plid = pol->plid; pol->pd_init_fn(blkg); spin_unlock(&blkg->blkcg->lock); @@ -890,6 +1083,8 @@ void blkcg_deactivate_policy(struct request_queue *q, /* grab blkcg lock too while removing @pd from @blkg */ spin_lock(&blkg->blkcg->lock); + if (pol->pd_offline_fn) + pol->pd_offline_fn(blkg); if (pol->pd_exit_fn) pol->pd_exit_fn(blkg); diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 24597309e23..f2b292925cc 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -54,6 +54,7 @@ struct blkcg { /* TODO: per-policy storage in blkcg */ unsigned int cfq_weight; /* belongs to cfq */ + unsigned int cfq_leaf_weight; }; struct blkg_stat { @@ -80,8 +81,9 @@ struct blkg_rwstat { * beginning and pd_size can't be smaller than pd. */ struct blkg_policy_data { - /* the blkg this per-policy data belongs to */ + /* the blkg and policy id this per-policy data belongs to */ struct blkcg_gq *blkg; + int plid; /* used during policy activation */ struct list_head alloc_node; @@ -94,17 +96,27 @@ struct blkcg_gq { struct list_head q_node; struct hlist_node blkcg_node; struct blkcg *blkcg; + + /* all non-root blkcg_gq's are guaranteed to have access to parent */ + struct blkcg_gq *parent; + /* request allocation list for this blkcg-q pair */ struct request_list rl; + /* reference count */ int refcnt; + /* is this blkg online? protected by both blkcg and q locks */ + bool online; + struct blkg_policy_data *pd[BLKCG_MAX_POLS]; struct rcu_head rcu_head; }; typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); +typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); @@ -117,6 +129,8 @@ struct blkcg_policy { /* operations */ blkcg_pol_init_pd_fn *pd_init_fn; + blkcg_pol_online_pd_fn *pd_online_fn; + blkcg_pol_offline_pd_fn *pd_offline_fn; blkcg_pol_exit_pd_fn *pd_exit_fn; blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; }; @@ -150,6 +164,10 @@ u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); +u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, + int off); + struct blkg_conf_ctx { struct gendisk *disk; struct blkcg_gq *blkg; @@ -181,6 +199,19 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) } /** + * blkcg_parent - get the parent of a blkcg + * @blkcg: blkcg of interest + * + * Return the parent blkcg of @blkcg. Can be called anytime. + */ +static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) +{ + struct cgroup *pcg = blkcg->css.cgroup->parent; + + return pcg ? cgroup_to_blkcg(pcg) : NULL; +} + +/** * blkg_to_pdata - get policy private data * @blkg: blkg of interest * @pol: policy of interest @@ -387,6 +418,18 @@ static inline void blkg_stat_reset(struct blkg_stat *stat) } /** + * blkg_stat_merge - merge a blkg_stat into another + * @to: the destination blkg_stat + * @from: the source + * + * Add @from's count to @to. + */ +static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) +{ + blkg_stat_add(to, blkg_stat_read(from)); +} + +/** * blkg_rwstat_add - add a value to a blkg_rwstat * @rwstat: target blkg_rwstat * @rw: mask of REQ_{WRITE|SYNC} @@ -434,14 +477,14 @@ static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) } /** - * blkg_rwstat_sum - read the total count of a blkg_rwstat + * blkg_rwstat_total - read the total count of a blkg_rwstat * @rwstat: blkg_rwstat to read * * Return the total count of @rwstat regardless of the IO direction. This * function can be called without synchronization and takes care of u64 * atomicity. */ -static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat) +static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) { struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); @@ -457,6 +500,25 @@ static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); } +/** + * blkg_rwstat_merge - merge a blkg_rwstat into another + * @to: the destination blkg_rwstat + * @from: the source + * + * Add @from's counts to @to. + */ +static inline void blkg_rwstat_merge(struct blkg_rwstat *to, + struct blkg_rwstat *from) +{ + struct blkg_rwstat v = blkg_rwstat_read(from); + int i; + + u64_stats_update_begin(&to->syncp); + for (i = 0; i < BLKG_RWSTAT_NR; i++) + to->cnt[i] += v.cnt[i]; + u64_stats_update_end(&to->syncp); +} + #else /* CONFIG_BLK_CGROUP */ struct cgroup; diff --git a/block/blk-core.c b/block/blk-core.c index 277134cb5d3..074b758efc4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -39,7 +39,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); -EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); DEFINE_IDA(blk_queue_ida); @@ -1348,7 +1347,7 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, if (!ll_back_merge_fn(q, req, bio)) return false; - trace_block_bio_backmerge(q, bio); + trace_block_bio_backmerge(q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) blk_rq_set_mixed_merge(req); @@ -1370,7 +1369,7 @@ static bool bio_attempt_front_merge(struct request_queue *q, if (!ll_front_merge_fn(q, req, bio)) return false; - trace_block_bio_frontmerge(q, bio); + trace_block_bio_frontmerge(q, req, bio); if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) blk_rq_set_mixed_merge(req); @@ -1553,13 +1552,6 @@ get_rq: if (list_empty(&plug->list)) trace_block_plug(q); else { - if (!plug->should_sort) { - struct request *__rq; - - __rq = list_entry_rq(plug->list.prev); - if (__rq->q != q) - plug->should_sort = 1; - } if (request_count >= BLK_MAX_REQUEST_COUNT) { blk_flush_plug_list(plug, false); trace_block_plug(q); @@ -2890,7 +2882,6 @@ void blk_start_plug(struct blk_plug *plug) plug->magic = PLUG_MAGIC; INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->cb_list); - plug->should_sort = 0; /* * If this is a nested plug, don't actually assign it. It will be @@ -2992,10 +2983,7 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) list_splice_init(&plug->list, &list); - if (plug->should_sort) { - list_sort(NULL, &list, plug_rq_cmp); - plug->should_sort = 0; - } + list_sort(NULL, &list, plug_rq_cmp); q = NULL; depth = 0; diff --git a/block/blk-exec.c b/block/blk-exec.c index c88202f973d..e7062139612 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -121,9 +121,9 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, /* Prevent hang_check timer from firing at us during very long I/O */ hang_check = sysctl_hung_task_timeout_secs; if (hang_check) - while (!wait_for_completion_timeout(&wait, hang_check * (HZ/2))); + while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2))); else - wait_for_completion(&wait); + wait_for_completion_io(&wait); if (rq->errors) err = -EIO; diff --git a/block/blk-flush.c b/block/blk-flush.c index 720ad607ff9..db8f1b50785 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -436,7 +436,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, bio_get(bio); submit_bio(WRITE_FLUSH, bio); - wait_for_completion(&wait); + wait_for_completion_io(&wait); /* * The driver must store the error location in ->bi_sector, if diff --git a/block/blk-lib.c b/block/blk-lib.c index b3a1f2b70b3..d6f50d57256 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -126,7 +126,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, /* Wait for bios in-flight */ if (!atomic_dec_and_test(&bb.done)) - wait_for_completion(&wait); + wait_for_completion_io(&wait); if (!test_bit(BIO_UPTODATE, &bb.flags)) ret = -EIO; @@ -200,7 +200,7 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, /* Wait for bios in-flight */ if (!atomic_dec_and_test(&bb.done)) - wait_for_completion(&wait); + wait_for_completion_io(&wait); if (!test_bit(BIO_UPTODATE, &bb.flags)) ret = -ENOTSUPP; @@ -262,7 +262,7 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, /* Wait for bios in-flight */ if (!atomic_dec_and_test(&bb.done)) - wait_for_completion(&wait); + wait_for_completion_io(&wait); if (!test_bit(BIO_UPTODATE, &bb.flags)) /* One of bios in the batch was completed with error.*/ diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 788147797a7..6206a934eb8 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -497,6 +497,13 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, return res; } +static void blk_free_queue_rcu(struct rcu_head *rcu_head) +{ + struct request_queue *q = container_of(rcu_head, struct request_queue, + rcu_head); + kmem_cache_free(blk_requestq_cachep, q); +} + /** * blk_release_queue: - release a &struct request_queue when it is no longer needed * @kobj: the kobj belonging to the request queue to be released @@ -538,7 +545,7 @@ static void blk_release_queue(struct kobject *kobj) bdi_destroy(&q->backing_dev_info); ida_simple_remove(&blk_queue_ida, q->id); - kmem_cache_free(blk_requestq_cachep, q); + call_rcu(&q->rcu_head, blk_free_queue_rcu); } static const struct sysfs_ops queue_sysfs_ops = { diff --git a/block/blk.h b/block/blk.h index 47fdfdd4152..e837b8f619b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -61,7 +61,7 @@ static inline void blk_clear_rq_complete(struct request *rq) /* * Internal elevator interface */ -#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) +#define ELV_ON_HASH(rq) hash_hashed(&(rq)->hash) void blk_insert_flush(struct request *rq); void blk_abort_flushes(struct request_queue *q); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ec52807cdd0..4f0ade74cfd 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -85,7 +85,6 @@ struct cfq_rb_root { struct rb_root rb; struct rb_node *left; unsigned count; - unsigned total_weight; u64 min_vdisktime; struct cfq_ttime ttime; }; @@ -155,7 +154,7 @@ struct cfq_queue { * First index in the service_trees. * IDLE is handled separately, so it has negative index */ -enum wl_prio_t { +enum wl_class_t { BE_WORKLOAD = 0, RT_WORKLOAD = 1, IDLE_WORKLOAD = 2, @@ -223,10 +222,45 @@ struct cfq_group { /* group service_tree key */ u64 vdisktime; + + /* + * The number of active cfqgs and sum of their weights under this + * cfqg. This covers this cfqg's leaf_weight and all children's + * weights, but does not cover weights of further descendants. + * + * If a cfqg is on the service tree, it's active. An active cfqg + * also activates its parent and contributes to the children_weight + * of the parent. + */ + int nr_active; + unsigned int children_weight; + + /* + * vfraction is the fraction of vdisktime that the tasks in this + * cfqg are entitled to. This is determined by compounding the + * ratios walking up from this cfqg to the root. + * + * It is in fixed point w/ CFQ_SERVICE_SHIFT and the sum of all + * vfractions on a service tree is approximately 1. The sum may + * deviate a bit due to rounding errors and fluctuations caused by + * cfqgs entering and leaving the service tree. + */ + unsigned int vfraction; + + /* + * There are two weights - (internal) weight is the weight of this + * cfqg against the sibling cfqgs. leaf_weight is the wight of + * this cfqg against the child cfqgs. For the root cfqg, both + * weights are kept in sync for backward compatibility. + */ unsigned int weight; unsigned int new_weight; unsigned int dev_weight; + unsigned int leaf_weight; + unsigned int new_leaf_weight; + unsigned int dev_leaf_weight; + /* number of cfqq currently on this group */ int nr_cfqq; @@ -248,14 +282,15 @@ struct cfq_group { struct cfq_rb_root service_trees[2][3]; struct cfq_rb_root service_tree_idle; - unsigned long saved_workload_slice; - enum wl_type_t saved_workload; - enum wl_prio_t saved_serving_prio; + unsigned long saved_wl_slice; + enum wl_type_t saved_wl_type; + enum wl_class_t saved_wl_class; /* number of requests that are on the dispatch list or inside driver */ int dispatched; struct cfq_ttime ttime; - struct cfqg_stats stats; + struct cfqg_stats stats; /* stats for this cfqg */ + struct cfqg_stats dead_stats; /* stats pushed from dead children */ }; struct cfq_io_cq { @@ -280,8 +315,8 @@ struct cfq_data { /* * The priority currently being served */ - enum wl_prio_t serving_prio; - enum wl_type_t serving_type; + enum wl_class_t serving_wl_class; + enum wl_type_t serving_wl_type; unsigned long workload_expires; struct cfq_group *serving_group; @@ -353,17 +388,17 @@ struct cfq_data { static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd); -static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg, - enum wl_prio_t prio, +static struct cfq_rb_root *st_for(struct cfq_group *cfqg, + enum wl_class_t class, enum wl_type_t type) { if (!cfqg) return NULL; - if (prio == IDLE_WORKLOAD) + if (class == IDLE_WORKLOAD) return &cfqg->service_tree_idle; - return &cfqg->service_trees[prio][type]; + return &cfqg->service_trees[class][type]; } enum cfqq_state_flags { @@ -502,7 +537,7 @@ static void cfqg_stats_set_start_empty_time(struct cfq_group *cfqg) { struct cfqg_stats *stats = &cfqg->stats; - if (blkg_rwstat_sum(&stats->queued)) + if (blkg_rwstat_total(&stats->queued)) return; /* @@ -546,7 +581,7 @@ static void cfqg_stats_update_avg_queue_size(struct cfq_group *cfqg) struct cfqg_stats *stats = &cfqg->stats; blkg_stat_add(&stats->avg_queue_size_sum, - blkg_rwstat_sum(&stats->queued)); + blkg_rwstat_total(&stats->queued)); blkg_stat_add(&stats->avg_queue_size_samples, 1); cfqg_stats_update_group_wait_time(stats); } @@ -572,6 +607,13 @@ static inline struct cfq_group *blkg_to_cfqg(struct blkcg_gq *blkg) return pd_to_cfqg(blkg_to_pd(blkg, &blkcg_policy_cfq)); } +static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) +{ + struct blkcg_gq *pblkg = cfqg_to_blkg(cfqg)->parent; + + return pblkg ? blkg_to_cfqg(pblkg) : NULL; +} + static inline void cfqg_get(struct cfq_group *cfqg) { return blkg_get(cfqg_to_blkg(cfqg)); @@ -586,8 +628,9 @@ static inline void cfqg_put(struct cfq_group *cfqg) char __pbuf[128]; \ \ blkg_path(cfqg_to_blkg((cfqq)->cfqg), __pbuf, sizeof(__pbuf)); \ - blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \ - cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ + blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c %s " fmt, (cfqq)->pid, \ + cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ + cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\ __pbuf, ##args); \ } while (0) @@ -646,11 +689,9 @@ static inline void cfqg_stats_update_completion(struct cfq_group *cfqg, io_start_time - start_time); } -static void cfq_pd_reset_stats(struct blkcg_gq *blkg) +/* @stats = 0 */ +static void cfqg_stats_reset(struct cfqg_stats *stats) { - struct cfq_group *cfqg = blkg_to_cfqg(blkg); - struct cfqg_stats *stats = &cfqg->stats; - /* queued stats shouldn't be cleared */ blkg_rwstat_reset(&stats->service_bytes); blkg_rwstat_reset(&stats->serviced); @@ -669,13 +710,58 @@ static void cfq_pd_reset_stats(struct blkcg_gq *blkg) #endif } +/* @to += @from */ +static void cfqg_stats_merge(struct cfqg_stats *to, struct cfqg_stats *from) +{ + /* queued stats shouldn't be cleared */ + blkg_rwstat_merge(&to->service_bytes, &from->service_bytes); + blkg_rwstat_merge(&to->serviced, &from->serviced); + blkg_rwstat_merge(&to->merged, &from->merged); + blkg_rwstat_merge(&to->service_time, &from->service_time); + blkg_rwstat_merge(&to->wait_time, &from->wait_time); + blkg_stat_merge(&from->time, &from->time); +#ifdef CONFIG_DEBUG_BLK_CGROUP + blkg_stat_merge(&to->unaccounted_time, &from->unaccounted_time); + blkg_stat_merge(&to->avg_queue_size_sum, &from->avg_queue_size_sum); + blkg_stat_merge(&to->avg_queue_size_samples, &from->avg_queue_size_samples); + blkg_stat_merge(&to->dequeue, &from->dequeue); + blkg_stat_merge(&to->group_wait_time, &from->group_wait_time); + blkg_stat_merge(&to->idle_time, &from->idle_time); + blkg_stat_merge(&to->empty_time, &from->empty_time); +#endif +} + +/* + * Transfer @cfqg's stats to its parent's dead_stats so that the ancestors' + * recursive stats can still account for the amount used by this cfqg after + * it's gone. + */ +static void cfqg_stats_xfer_dead(struct cfq_group *cfqg) +{ + struct cfq_group *parent = cfqg_parent(cfqg); + + lockdep_assert_held(cfqg_to_blkg(cfqg)->q->queue_lock); + + if (unlikely(!parent)) + return; + + cfqg_stats_merge(&parent->dead_stats, &cfqg->stats); + cfqg_stats_merge(&parent->dead_stats, &cfqg->dead_stats); + cfqg_stats_reset(&cfqg->stats); + cfqg_stats_reset(&cfqg->dead_stats); +} + #else /* CONFIG_CFQ_GROUP_IOSCHED */ +static inline struct cfq_group *cfqg_parent(struct cfq_group *cfqg) { return NULL; } static inline void cfqg_get(struct cfq_group *cfqg) { } static inline void cfqg_put(struct cfq_group *cfqg) { } #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \ - blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args) + blk_add_trace_msg((cfqd)->queue, "cfq%d%c%c " fmt, (cfqq)->pid, \ + cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \ + cfqq_type((cfqq)) == SYNC_NOIDLE_WORKLOAD ? 'N' : ' ',\ + ##args) #define cfq_log_cfqg(cfqd, cfqg, fmt, args...) do {} while (0) static inline void cfqg_stats_update_io_add(struct cfq_group *cfqg, @@ -732,7 +818,7 @@ static inline bool iops_mode(struct cfq_data *cfqd) return false; } -static inline enum wl_prio_t cfqq_prio(struct cfq_queue *cfqq) +static inline enum wl_class_t cfqq_class(struct cfq_queue *cfqq) { if (cfq_class_idle(cfqq)) return IDLE_WORKLOAD; @@ -751,23 +837,23 @@ static enum wl_type_t cfqq_type(struct cfq_queue *cfqq) return SYNC_WORKLOAD; } -static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl, +static inline int cfq_group_busy_queues_wl(enum wl_class_t wl_class, struct cfq_data *cfqd, struct cfq_group *cfqg) { - if (wl == IDLE_WORKLOAD) + if (wl_class == IDLE_WORKLOAD) return cfqg->service_tree_idle.count; - return cfqg->service_trees[wl][ASYNC_WORKLOAD].count - + cfqg->service_trees[wl][SYNC_NOIDLE_WORKLOAD].count - + cfqg->service_trees[wl][SYNC_WORKLOAD].count; + return cfqg->service_trees[wl_class][ASYNC_WORKLOAD].count + + cfqg->service_trees[wl_class][SYNC_NOIDLE_WORKLOAD].count + + cfqg->service_trees[wl_class][SYNC_WORKLOAD].count; } static inline int cfqg_busy_async_queues(struct cfq_data *cfqd, struct cfq_group *cfqg) { - return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count - + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count; + return cfqg->service_trees[RT_WORKLOAD][ASYNC_WORKLOAD].count + + cfqg->service_trees[BE_WORKLOAD][ASYNC_WORKLOAD].count; } static void cfq_dispatch_insert(struct request_queue *, struct request *); @@ -847,13 +933,27 @@ cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) return cfq_prio_slice(cfqd, cfq_cfqq_sync(cfqq), cfqq->ioprio); } -static inline u64 cfq_scale_slice(unsigned long delta, struct cfq_group *cfqg) +/** + * cfqg_scale_charge - scale disk time charge according to cfqg weight + * @charge: disk time being charged + * @vfraction: vfraction of the cfqg, fixed point w/ CFQ_SERVICE_SHIFT + * + * Scale @charge according to @vfraction, which is in range (0, 1]. The + * scaling is inversely proportional. + * + * scaled = charge / vfraction + * + * The result is also in fixed point w/ CFQ_SERVICE_SHIFT. + */ +static inline u64 cfqg_scale_charge(unsigned long charge, + unsigned int vfraction) { - u64 d = delta << CFQ_SERVICE_SHIFT; + u64 c = charge << CFQ_SERVICE_SHIFT; /* make it fixed point */ - d = d * CFQ_WEIGHT_DEFAULT; - do_div(d, cfqg->weight); - return d; + /* charge / vfraction */ + c <<= CFQ_SERVICE_SHIFT; + do_div(c, vfraction); + return c; } static inline u64 max_vdisktime(u64 min_vdisktime, u64 vdisktime) @@ -909,9 +1009,7 @@ static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd, static inline unsigned cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) { - struct cfq_rb_root *st = &cfqd->grp_service_tree; - - return cfqd->cfq_target_latency * cfqg->weight / st->total_weight; + return cfqd->cfq_target_latency * cfqg->vfraction >> CFQ_SERVICE_SHIFT; } static inline unsigned @@ -1178,20 +1276,61 @@ static void cfq_update_group_weight(struct cfq_group *cfqg) { BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); + if (cfqg->new_weight) { cfqg->weight = cfqg->new_weight; cfqg->new_weight = 0; } + + if (cfqg->new_leaf_weight) { + cfqg->leaf_weight = cfqg->new_leaf_weight; + cfqg->new_leaf_weight = 0; + } } static void cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) { + unsigned int vfr = 1 << CFQ_SERVICE_SHIFT; /* start with 1 */ + struct cfq_group *pos = cfqg; + struct cfq_group *parent; + bool propagate; + + /* add to the service tree */ BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); cfq_update_group_weight(cfqg); __cfq_group_service_tree_add(st, cfqg); - st->total_weight += cfqg->weight; + + /* + * Activate @cfqg and calculate the portion of vfraction @cfqg is + * entitled to. vfraction is calculated by walking the tree + * towards the root calculating the fraction it has at each level. + * The compounded ratio is how much vfraction @cfqg owns. + * + * Start with the proportion tasks in this cfqg has against active + * children cfqgs - its leaf_weight against children_weight. + */ + propagate = !pos->nr_active++; + pos->children_weight += pos->leaf_weight; + vfr = vfr * pos->leaf_weight / pos->children_weight; + + /* + * Compound ->weight walking up the tree. Both activation and + * vfraction calculation are done in the same loop. Propagation + * stops once an already activated node is met. vfraction + * calculation should always continue to the root. + */ + while ((parent = cfqg_parent(pos))) { + if (propagate) { + propagate = !parent->nr_active++; + parent->children_weight += pos->weight; + } + vfr = vfr * pos->weight / parent->children_weight; + pos = parent; + } + + cfqg->vfraction = max_t(unsigned, vfr, 1); } static void @@ -1222,7 +1361,32 @@ cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) static void cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg) { - st->total_weight -= cfqg->weight; + struct cfq_group *pos = cfqg; + bool propagate; + + /* + * Undo activation from cfq_group_service_tree_add(). Deactivate + * @cfqg and propagate deactivation upwards. + */ + propagate = !--pos->nr_active; + pos->children_weight -= pos->leaf_weight; + + while (propagate) { + struct cfq_group *parent = cfqg_parent(pos); + + /* @pos has 0 nr_active at this point */ + WARN_ON_ONCE(pos->children_weight); + pos->vfraction = 0; + + if (!parent) + break; + + propagate = !--parent->nr_active; + parent->children_weight -= pos->weight; + pos = parent; + } + + /* remove from the service tree */ if (!RB_EMPTY_NODE(&cfqg->rb_node)) cfq_rb_erase(&cfqg->rb_node, st); } @@ -1241,7 +1405,7 @@ cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg) cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); cfq_group_service_tree_del(st, cfqg); - cfqg->saved_workload_slice = 0; + cfqg->saved_wl_slice = 0; cfqg_stats_update_dequeue(cfqg); } @@ -1284,6 +1448,7 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, unsigned int used_sl, charge, unaccounted_sl = 0; int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) - cfqg->service_tree_idle.count; + unsigned int vfr; BUG_ON(nr_sync < 0); used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); @@ -1293,20 +1458,25 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, else if (!cfq_cfqq_sync(cfqq) && !nr_sync) charge = cfqq->allocated_slice; - /* Can't update vdisktime while group is on service tree */ + /* + * Can't update vdisktime while on service tree and cfqg->vfraction + * is valid only while on it. Cache vfr, leave the service tree, + * update vdisktime and go back on. The re-addition to the tree + * will also update the weights as necessary. + */ + vfr = cfqg->vfraction; cfq_group_service_tree_del(st, cfqg); - cfqg->vdisktime += cfq_scale_slice(charge, cfqg); - /* If a new weight was requested, update now, off tree */ + cfqg->vdisktime += cfqg_scale_charge(charge, vfr); cfq_group_service_tree_add(st, cfqg); /* This group is being expired. Save the context */ if (time_after(cfqd->workload_expires, jiffies)) { - cfqg->saved_workload_slice = cfqd->workload_expires + cfqg->saved_wl_slice = cfqd->workload_expires - jiffies; - cfqg->saved_workload = cfqd->serving_type; - cfqg->saved_serving_prio = cfqd->serving_prio; + cfqg->saved_wl_type = cfqd->serving_wl_type; + cfqg->saved_wl_class = cfqd->serving_wl_class; } else - cfqg->saved_workload_slice = 0; + cfqg->saved_wl_slice = 0; cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime, st->min_vdisktime); @@ -1344,6 +1514,52 @@ static void cfq_pd_init(struct blkcg_gq *blkg) cfq_init_cfqg_base(cfqg); cfqg->weight = blkg->blkcg->cfq_weight; + cfqg->leaf_weight = blkg->blkcg->cfq_leaf_weight; +} + +static void cfq_pd_offline(struct blkcg_gq *blkg) +{ + /* + * @blkg is going offline and will be ignored by + * blkg_[rw]stat_recursive_sum(). Transfer stats to the parent so + * that they don't get lost. If IOs complete after this point, the + * stats for them will be lost. Oh well... + */ + cfqg_stats_xfer_dead(blkg_to_cfqg(blkg)); +} + +/* offset delta from cfqg->stats to cfqg->dead_stats */ +static const int dead_stats_off_delta = offsetof(struct cfq_group, dead_stats) - + offsetof(struct cfq_group, stats); + +/* to be used by recursive prfill, sums live and dead stats recursively */ +static u64 cfqg_stat_pd_recursive_sum(struct blkg_policy_data *pd, int off) +{ + u64 sum = 0; + + sum += blkg_stat_recursive_sum(pd, off); + sum += blkg_stat_recursive_sum(pd, off + dead_stats_off_delta); + return sum; +} + +/* to be used by recursive prfill, sums live and dead rwstats recursively */ +static struct blkg_rwstat cfqg_rwstat_pd_recursive_sum(struct blkg_policy_data *pd, + int off) +{ + struct blkg_rwstat a, b; + + a = blkg_rwstat_recursive_sum(pd, off); + b = blkg_rwstat_recursive_sum(pd, off + dead_stats_off_delta); + blkg_rwstat_merge(&a, &b); + return a; +} + +static void cfq_pd_reset_stats(struct blkcg_gq *blkg) +{ + struct cfq_group *cfqg = blkg_to_cfqg(blkg); + + cfqg_stats_reset(&cfqg->stats); + cfqg_stats_reset(&cfqg->dead_stats); } /* @@ -1400,6 +1616,26 @@ static int cfqg_print_weight_device(struct cgroup *cgrp, struct cftype *cft, return 0; } +static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf, + struct blkg_policy_data *pd, int off) +{ + struct cfq_group *cfqg = pd_to_cfqg(pd); + + if (!cfqg->dev_leaf_weight) + return 0; + return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); +} + +static int cfqg_print_leaf_weight_device(struct cgroup *cgrp, + struct cftype *cft, + struct seq_file *sf) +{ + blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), + cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq, 0, + false); + return 0; +} + static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { @@ -1407,8 +1643,16 @@ static int cfq_print_weight(struct cgroup *cgrp, struct cftype *cft, return 0; } -static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, - const char *buf) +static int cfq_print_leaf_weight(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) +{ + seq_printf(sf, "%u\n", + cgroup_to_blkcg(cgrp)->cfq_leaf_weight); + return 0; +} + +static int __cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, + const char *buf, bool is_leaf_weight) { struct blkcg *blkcg = cgroup_to_blkcg(cgrp); struct blkg_conf_ctx ctx; @@ -1422,8 +1666,13 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, ret = -EINVAL; cfqg = blkg_to_cfqg(ctx.blkg); if (!ctx.v || (ctx.v >= CFQ_WEIGHT_MIN && ctx.v <= CFQ_WEIGHT_MAX)) { - cfqg->dev_weight = ctx.v; - cfqg->new_weight = cfqg->dev_weight ?: blkcg->cfq_weight; + if (!is_leaf_weight) { + cfqg->dev_weight = ctx.v; + cfqg->new_weight = ctx.v ?: blkcg->cfq_weight; + } else { + cfqg->dev_leaf_weight = ctx.v; + cfqg->new_leaf_weight = ctx.v ?: blkcg->cfq_leaf_weight; + } ret = 0; } @@ -1431,7 +1680,20 @@ static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, return ret; } -static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) +static int cfqg_set_weight_device(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + return __cfqg_set_weight_device(cgrp, cft, buf, false); +} + +static int cfqg_set_leaf_weight_device(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + return __cfqg_set_weight_device(cgrp, cft, buf, true); +} + +static int __cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val, + bool is_leaf_weight) { struct blkcg *blkcg = cgroup_to_blkcg(cgrp); struct blkcg_gq *blkg; @@ -1440,19 +1702,41 @@ static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) return -EINVAL; spin_lock_irq(&blkcg->lock); - blkcg->cfq_weight = (unsigned int)val; + + if (!is_leaf_weight) + blkcg->cfq_weight = val; + else + blkcg->cfq_leaf_weight = val; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct cfq_group *cfqg = blkg_to_cfqg(blkg); - if (cfqg && !cfqg->dev_weight) - cfqg->new_weight = blkcg->cfq_weight; + if (!cfqg) + continue; + + if (!is_leaf_weight) { + if (!cfqg->dev_weight) + cfqg->new_weight = blkcg->cfq_weight; + } else { + if (!cfqg->dev_leaf_weight) + cfqg->new_leaf_weight = blkcg->cfq_leaf_weight; + } } spin_unlock_irq(&blkcg->lock); return 0; } +static int cfq_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + return __cfq_set_weight(cgrp, cft, val, false); +} + +static int cfq_set_leaf_weight(struct cgroup *cgrp, struct cftype *cft, u64 val) +{ + return __cfq_set_weight(cgrp, cft, val, true); +} + static int cfqg_print_stat(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { @@ -1473,6 +1757,42 @@ static int cfqg_print_rwstat(struct cgroup *cgrp, struct cftype *cft, return 0; } +static u64 cfqg_prfill_stat_recursive(struct seq_file *sf, + struct blkg_policy_data *pd, int off) +{ + u64 sum = cfqg_stat_pd_recursive_sum(pd, off); + + return __blkg_prfill_u64(sf, pd, sum); +} + +static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf, + struct blkg_policy_data *pd, int off) +{ + struct blkg_rwstat sum = cfqg_rwstat_pd_recursive_sum(pd, off); + + return __blkg_prfill_rwstat(sf, pd, &sum); +} + +static int cfqg_print_stat_recursive(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) +{ + struct blkcg *blkcg = cgroup_to_blkcg(cgrp); + + blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive, + &blkcg_policy_cfq, cft->private, false); + return 0; +} + +static int cfqg_print_rwstat_recursive(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) +{ + struct blkcg *blkcg = cgroup_to_blkcg(cgrp); + + blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive, + &blkcg_policy_cfq, cft->private, true); + return 0; +} + #ifdef CONFIG_DEBUG_BLK_CGROUP static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf, struct blkg_policy_data *pd, int off) @@ -1502,17 +1822,49 @@ static int cfqg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft, #endif /* CONFIG_DEBUG_BLK_CGROUP */ static struct cftype cfq_blkcg_files[] = { + /* on root, weight is mapped to leaf_weight */ + { + .name = "weight_device", + .flags = CFTYPE_ONLY_ON_ROOT, + .read_seq_string = cfqg_print_leaf_weight_device, + .write_string = cfqg_set_leaf_weight_device, + .max_write_len = 256, + }, + { + .name = "weight", + .flags = CFTYPE_ONLY_ON_ROOT, + .read_seq_string = cfq_print_leaf_weight, + .write_u64 = cfq_set_leaf_weight, + }, + + /* no such mapping necessary for !roots */ { .name = "weight_device", + .flags = CFTYPE_NOT_ON_ROOT, .read_seq_string = cfqg_print_weight_device, .write_string = cfqg_set_weight_device, .max_write_len = 256, }, { .name = "weight", + .flags = CFTYPE_NOT_ON_ROOT, .read_seq_string = cfq_print_weight, .write_u64 = cfq_set_weight, }, + + { + .name = "leaf_weight_device", + .read_seq_string = cfqg_print_leaf_weight_device, + .write_string = cfqg_set_leaf_weight_device, + .max_write_len = 256, + }, + { + .name = "leaf_weight", + .read_seq_string = cfq_print_leaf_weight, + .write_u64 = cfq_set_leaf_weight, + }, + + /* statistics, covers only the tasks in the cfqg */ { .name = "time", .private = offsetof(struct cfq_group, stats.time), @@ -1553,6 +1905,48 @@ static struct cftype cfq_blkcg_files[] = { .private = offsetof(struct cfq_group, stats.queued), .read_seq_string = cfqg_print_rwstat, }, + + /* the same statictics which cover the cfqg and its descendants */ + { + .name = "time_recursive", + .private = offsetof(struct cfq_group, stats.time), + .read_seq_string = cfqg_print_stat_recursive, + }, + { + .name = "sectors_recursive", + .private = offsetof(struct cfq_group, stats.sectors), + .read_seq_string = cfqg_print_stat_recursive, + }, + { + .name = "io_service_bytes_recursive", + .private = offsetof(struct cfq_group, stats.service_bytes), + .read_seq_string = cfqg_print_rwstat_recursive, + }, + { + .name = "io_serviced_recursive", + .private = offsetof(struct cfq_group, stats.serviced), + .read_seq_string = cfqg_print_rwstat_recursive, + }, + { + .name = "io_service_time_recursive", + .private = offsetof(struct cfq_group, stats.service_time), + .read_seq_string = cfqg_print_rwstat_recursive, + }, + { + .name = "io_wait_time_recursive", + .private = offsetof(struct cfq_group, stats.wait_time), + .read_seq_string = cfqg_print_rwstat_recursive, + }, + { + .name = "io_merged_recursive", + .private = offsetof(struct cfq_group, stats.merged), + .read_seq_string = cfqg_print_rwstat_recursive, + }, + { + .name = "io_queued_recursive", + .private = offsetof(struct cfq_group, stats.queued), + .read_seq_string = cfqg_print_rwstat_recursive, + }, #ifdef CONFIG_DEBUG_BLK_CGROUP { .name = "avg_queue_size", @@ -1611,15 +2005,14 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct rb_node **p, *parent; struct cfq_queue *__cfqq; unsigned long rb_key; - struct cfq_rb_root *service_tree; + struct cfq_rb_root *st; int left; int new_cfqq = 1; - service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), - cfqq_type(cfqq)); + st = st_for(cfqq->cfqg, cfqq_class(cfqq), cfqq_type(cfqq)); if (cfq_class_idle(cfqq)) { rb_key = CFQ_IDLE_DELAY; - parent = rb_last(&service_tree->rb); + parent = rb_last(&st->rb); if (parent && parent != &cfqq->rb_node) { __cfqq = rb_entry(parent, struct cfq_queue, rb_node); rb_key += __cfqq->rb_key; @@ -1637,7 +2030,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqq->slice_resid = 0; } else { rb_key = -HZ; - __cfqq = cfq_rb_first(service_tree); + __cfqq = cfq_rb_first(st); rb_key += __cfqq ? __cfqq->rb_key : jiffies; } @@ -1646,8 +2039,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, /* * same position, nothing more to do */ - if (rb_key == cfqq->rb_key && - cfqq->service_tree == service_tree) + if (rb_key == cfqq->rb_key && cfqq->service_tree == st) return; cfq_rb_erase(&cfqq->rb_node, cfqq->service_tree); @@ -1656,11 +2048,9 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, left = 1; parent = NULL; - cfqq->service_tree = service_tree; - p = &service_tree->rb.rb_node; + cfqq->service_tree = st; + p = &st->rb.rb_node; while (*p) { - struct rb_node **n; - parent = *p; __cfqq = rb_entry(parent, struct cfq_queue, rb_node); @@ -1668,22 +2058,20 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, * sort by key, that represents service time. */ if (time_before(rb_key, __cfqq->rb_key)) - n = &(*p)->rb_left; + p = &parent->rb_left; else { - n = &(*p)->rb_right; + p = &parent->rb_right; left = 0; } - - p = n; } if (left) - service_tree->left = &cfqq->rb_node; + st->left = &cfqq->rb_node; cfqq->rb_key = rb_key; rb_link_node(&cfqq->rb_node, parent, p); - rb_insert_color(&cfqq->rb_node, &service_tree->rb); - service_tree->count++; + rb_insert_color(&cfqq->rb_node, &st->rb); + st->count++; if (add_front || !new_cfqq) return; cfq_group_notify_queue_add(cfqd, cfqq->cfqg); @@ -2029,8 +2417,8 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) { if (cfqq) { - cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d", - cfqd->serving_prio, cfqd->serving_type); + cfq_log_cfqq(cfqd, cfqq, "set_active wl_class:%d wl_type:%d", + cfqd->serving_wl_class, cfqd->serving_wl_type); cfqg_stats_update_avg_queue_size(cfqq->cfqg); cfqq->slice_start = 0; cfqq->dispatch_start = jiffies; @@ -2116,19 +2504,18 @@ static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out) */ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd) { - struct cfq_rb_root *service_tree = - service_tree_for(cfqd->serving_group, cfqd->serving_prio, - cfqd->serving_type); + struct cfq_rb_root *st = st_for(cfqd->serving_group, + cfqd->serving_wl_class, cfqd->serving_wl_type); if (!cfqd->rq_queued) return NULL; /* There is nothing to dispatch */ - if (!service_tree) + if (!st) return NULL; - if (RB_EMPTY_ROOT(&service_tree->rb)) + if (RB_EMPTY_ROOT(&st->rb)) return NULL; - return cfq_rb_first(service_tree); + return cfq_rb_first(st); } static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd) @@ -2284,17 +2671,17 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - enum wl_prio_t prio = cfqq_prio(cfqq); - struct cfq_rb_root *service_tree = cfqq->service_tree; + enum wl_class_t wl_class = cfqq_class(cfqq); + struct cfq_rb_root *st = cfqq->service_tree; - BUG_ON(!service_tree); - BUG_ON(!service_tree->count); + BUG_ON(!st); + BUG_ON(!st->count); if (!cfqd->cfq_slice_idle) return false; /* We never do for idle class queues. */ - if (prio == IDLE_WORKLOAD) + if (wl_class == IDLE_WORKLOAD) return false; /* We do for queues that were marked with idle window flag. */ @@ -2306,11 +2693,10 @@ static bool cfq_should_idle(struct cfq_data *cfqd, struct cfq_queue *cfqq) * Otherwise, we do only if they are the last ones * in their service tree. */ - if (service_tree->count == 1 && cfq_cfqq_sync(cfqq) && - !cfq_io_thinktime_big(cfqd, &service_tree->ttime, false)) + if (st->count == 1 && cfq_cfqq_sync(cfqq) && + !cfq_io_thinktime_big(cfqd, &st->ttime, false)) return true; - cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", - service_tree->count); + cfq_log_cfqq(cfqd, cfqq, "Not idling. st->count:%d", st->count); return false; } @@ -2493,8 +2879,8 @@ static void cfq_setup_merge(struct cfq_queue *cfqq, struct cfq_queue *new_cfqq) } } -static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, - struct cfq_group *cfqg, enum wl_prio_t prio) +static enum wl_type_t cfq_choose_wl_type(struct cfq_data *cfqd, + struct cfq_group *cfqg, enum wl_class_t wl_class) { struct cfq_queue *queue; int i; @@ -2504,7 +2890,7 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, for (i = 0; i <= SYNC_WORKLOAD; ++i) { /* select the one with lowest rb_key */ - queue = cfq_rb_first(service_tree_for(cfqg, prio, i)); + queue = cfq_rb_first(st_for(cfqg, wl_class, i)); if (queue && (!key_valid || time_before(queue->rb_key, lowest_key))) { lowest_key = queue->rb_key; @@ -2516,26 +2902,27 @@ static enum wl_type_t cfq_choose_wl(struct cfq_data *cfqd, return cur_best; } -static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) +static void +choose_wl_class_and_type(struct cfq_data *cfqd, struct cfq_group *cfqg) { unsigned slice; unsigned count; struct cfq_rb_root *st; unsigned group_slice; - enum wl_prio_t original_prio = cfqd->serving_prio; + enum wl_class_t original_class = cfqd->serving_wl_class; /* Choose next priority. RT > BE > IDLE */ if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg)) - cfqd->serving_prio = RT_WORKLOAD; + cfqd->serving_wl_class = RT_WORKLOAD; else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg)) - cfqd->serving_prio = BE_WORKLOAD; + cfqd->serving_wl_class = BE_WORKLOAD; else { - cfqd->serving_prio = IDLE_WORKLOAD; + cfqd->serving_wl_class = IDLE_WORKLOAD; cfqd->workload_expires = jiffies + 1; return; } - if (original_prio != cfqd->serving_prio) + if (original_class != cfqd->serving_wl_class) goto new_workload; /* @@ -2543,7 +2930,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) * (SYNC, SYNC_NOIDLE, ASYNC), and to compute a workload * expiration time */ - st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); + st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type); count = st->count; /* @@ -2554,9 +2941,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) new_workload: /* otherwise select new workload type */ - cfqd->serving_type = - cfq_choose_wl(cfqd, cfqg, cfqd->serving_prio); - st = service_tree_for(cfqg, cfqd->serving_prio, cfqd->serving_type); + cfqd->serving_wl_type = cfq_choose_wl_type(cfqd, cfqg, + cfqd->serving_wl_class); + st = st_for(cfqg, cfqd->serving_wl_class, cfqd->serving_wl_type); count = st->count; /* @@ -2567,10 +2954,11 @@ new_workload: group_slice = cfq_group_slice(cfqd, cfqg); slice = group_slice * count / - max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio], - cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg)); + max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_wl_class], + cfq_group_busy_queues_wl(cfqd->serving_wl_class, cfqd, + cfqg)); - if (cfqd->serving_type == ASYNC_WORKLOAD) { + if (cfqd->serving_wl_type == ASYNC_WORKLOAD) { unsigned int tmp; /* @@ -2616,14 +3004,14 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd) cfqd->serving_group = cfqg; /* Restore the workload type data */ - if (cfqg->saved_workload_slice) { - cfqd->workload_expires = jiffies + cfqg->saved_workload_slice; - cfqd->serving_type = cfqg->saved_workload; - cfqd->serving_prio = cfqg->saved_serving_prio; + if (cfqg->saved_wl_slice) { + cfqd->workload_expires = jiffies + cfqg->saved_wl_slice; + cfqd->serving_wl_type = cfqg->saved_wl_type; + cfqd->serving_wl_class = cfqg->saved_wl_class; } else cfqd->workload_expires = jiffies - 1; - choose_service_tree(cfqd, cfqg); + choose_wl_class_and_type(cfqd, cfqg); } /* @@ -3205,6 +3593,8 @@ retry: spin_lock_irq(cfqd->queue->queue_lock); if (new_cfqq) goto retry; + else + return &cfqd->oom_cfqq; } else { cfqq = kmem_cache_alloc_node(cfq_pool, gfp_mask | __GFP_ZERO, @@ -3402,7 +3792,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, return true; /* Allow preemption only if we are idling on sync-noidle tree */ - if (cfqd->serving_type == SYNC_NOIDLE_WORKLOAD && + if (cfqd->serving_wl_type == SYNC_NOIDLE_WORKLOAD && cfqq_type(new_cfqq) == SYNC_NOIDLE_WORKLOAD && new_cfqq->service_tree->count == 2 && RB_EMPTY_ROOT(&cfqq->sort_list)) @@ -3454,7 +3844,7 @@ static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) * doesn't happen */ if (old_type != cfqq_type(cfqq)) - cfqq->cfqg->saved_workload_slice = 0; + cfqq->cfqg->saved_wl_slice = 0; /* * Put the new queue at the front of the of the current list, @@ -3636,16 +4026,17 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--; if (sync) { - struct cfq_rb_root *service_tree; + struct cfq_rb_root *st; RQ_CIC(rq)->ttime.last_end_request = now; if (cfq_cfqq_on_rr(cfqq)) - service_tree = cfqq->service_tree; + st = cfqq->service_tree; else - service_tree = service_tree_for(cfqq->cfqg, - cfqq_prio(cfqq), cfqq_type(cfqq)); - service_tree->ttime.last_end_request = now; + st = st_for(cfqq->cfqg, cfqq_class(cfqq), + cfqq_type(cfqq)); + + st->ttime.last_end_request = now; if (!time_after(rq->start_time + cfqd->cfq_fifo_expire[1], now)) cfqd->last_delayed_sync = now; } @@ -3992,6 +4383,7 @@ static int cfq_init_queue(struct request_queue *q) cfq_init_cfqg_base(cfqd->root_group); #endif cfqd->root_group->weight = 2 * CFQ_WEIGHT_DEFAULT; + cfqd->root_group->leaf_weight = 2 * CFQ_WEIGHT_DEFAULT; /* * Not strictly needed (since RB_ROOT just clears the node and we @@ -4176,6 +4568,7 @@ static struct blkcg_policy blkcg_policy_cfq = { .cftypes = cfq_blkcg_files, .pd_init_fn = cfq_pd_init, + .pd_offline_fn = cfq_pd_offline, .pd_reset_stats_fn = cfq_pd_reset_stats, }; #endif diff --git a/block/elevator.c b/block/elevator.c index d0acb31cc08..a0ffdd943c9 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -46,11 +46,6 @@ static LIST_HEAD(elv_list); /* * Merge hash stuff. */ -static const int elv_hash_shift = 6; -#define ELV_HASH_BLOCK(sec) ((sec) >> 3) -#define ELV_HASH_FN(sec) \ - (hash_long(ELV_HASH_BLOCK((sec)), elv_hash_shift)) -#define ELV_HASH_ENTRIES (1 << elv_hash_shift) #define rq_hash_key(rq) (blk_rq_pos(rq) + blk_rq_sectors(rq)) /* @@ -158,7 +153,6 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q, struct elevator_type *e) { struct elevator_queue *eq; - int i; eq = kmalloc_node(sizeof(*eq), GFP_KERNEL | __GFP_ZERO, q->node); if (unlikely(!eq)) @@ -167,14 +161,7 @@ static struct elevator_queue *elevator_alloc(struct request_queue *q, eq->type = e; kobject_init(&eq->kobj, &elv_ktype); mutex_init(&eq->sysfs_lock); - - eq->hash = kmalloc_node(sizeof(struct hlist_head) * ELV_HASH_ENTRIES, - GFP_KERNEL, q->node); - if (!eq->hash) - goto err; - - for (i = 0; i < ELV_HASH_ENTRIES; i++) - INIT_HLIST_HEAD(&eq->hash[i]); + hash_init(eq->hash); return eq; err: @@ -189,7 +176,6 @@ static void elevator_release(struct kobject *kobj) e = container_of(kobj, struct elevator_queue, kobj); elevator_put(e->type); - kfree(e->hash); kfree(e); } @@ -261,7 +247,7 @@ EXPORT_SYMBOL(elevator_exit); static inline void __elv_rqhash_del(struct request *rq) { - hlist_del_init(&rq->hash); + hash_del(&rq->hash); } static void elv_rqhash_del(struct request_queue *q, struct request *rq) @@ -275,7 +261,7 @@ static void elv_rqhash_add(struct request_queue *q, struct request *rq) struct elevator_queue *e = q->elevator; BUG_ON(ELV_ON_HASH(rq)); - hlist_add_head(&rq->hash, &e->hash[ELV_HASH_FN(rq_hash_key(rq))]); + hash_add(e->hash, &rq->hash, rq_hash_key(rq)); } static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) @@ -287,11 +273,10 @@ static void elv_rqhash_reposition(struct request_queue *q, struct request *rq) static struct request *elv_rqhash_find(struct request_queue *q, sector_t offset) { struct elevator_queue *e = q->elevator; - struct hlist_head *hash_list = &e->hash[ELV_HASH_FN(offset)]; struct hlist_node *next; struct request *rq; - hlist_for_each_entry_safe(rq, next, hash_list, hash) { + hash_for_each_possible_safe(e->hash, rq, next, hash, offset) { BUG_ON(!ELV_ON_HASH(rq)); if (unlikely(!rq_mergeable(rq))) { diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 7ae2750bb45..d668a8ae602 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -48,8 +48,8 @@ #include <linux/genalloc.h> #include <linux/pci.h> #include <linux/aer.h> -#include <acpi/apei.h> -#include <acpi/hed.h> + +#include <acpi/ghes.h> #include <asm/mce.h> #include <asm/tlbflush.h> #include <asm/nmi.h> @@ -84,42 +84,6 @@ ((struct acpi_hest_generic_status *) \ ((struct ghes_estatus_node *)(estatus_node) + 1)) -/* - * One struct ghes is created for each generic hardware error source. - * It provides the context for APEI hardware error timer/IRQ/SCI/NMI - * handler. - * - * estatus: memory buffer for error status block, allocated during - * HEST parsing. - */ -#define GHES_TO_CLEAR 0x0001 -#define GHES_EXITING 0x0002 - -struct ghes { - struct acpi_hest_generic *generic; - struct acpi_hest_generic_status *estatus; - u64 buffer_paddr; - unsigned long flags; - union { - struct list_head list; - struct timer_list timer; - unsigned int irq; - }; -}; - -struct ghes_estatus_node { - struct llist_node llnode; - struct acpi_hest_generic *generic; -}; - -struct ghes_estatus_cache { - u32 estatus_len; - atomic_t count; - struct acpi_hest_generic *generic; - unsigned long long time_in; - struct rcu_head rcu; -}; - bool ghes_disable; module_param_named(disable, ghes_disable, bool, 0); @@ -333,13 +297,6 @@ static void ghes_fini(struct ghes *ghes) apei_unmap_generic_address(&ghes->generic->error_status_address); } -enum { - GHES_SEV_NO = 0x0, - GHES_SEV_CORRECTED = 0x1, - GHES_SEV_RECOVERABLE = 0x2, - GHES_SEV_PANIC = 0x3, -}; - static inline int ghes_severity(int severity) { switch (severity) { @@ -452,7 +409,8 @@ static void ghes_clear_estatus(struct ghes *ghes) ghes->flags &= ~GHES_TO_CLEAR; } -static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) +static void ghes_do_proc(struct ghes *ghes, + const struct acpi_hest_generic_status *estatus) { int sev, sec_sev; struct acpi_hest_generic_data *gdata; @@ -464,6 +422,8 @@ static void ghes_do_proc(const struct acpi_hest_generic_status *estatus) CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err; mem_err = (struct cper_sec_mem_err *)(gdata+1); + ghes_edac_report_mem_error(ghes, sev, mem_err); + #ifdef CONFIG_X86_MCE apei_mce_report_mem_error(sev == GHES_SEV_CORRECTED, mem_err); @@ -682,7 +642,7 @@ static int ghes_proc(struct ghes *ghes) if (ghes_print_estatus(NULL, ghes->generic, ghes->estatus)) ghes_estatus_cache_add(ghes->generic, ghes->estatus); } - ghes_do_proc(ghes->estatus); + ghes_do_proc(ghes, ghes->estatus); out: ghes_clear_estatus(ghes); return 0; @@ -775,7 +735,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) estatus = GHES_ESTATUS_FROM_NODE(estatus_node); len = apei_estatus_len(estatus); node_len = GHES_ESTATUS_NODE_LEN(len); - ghes_do_proc(estatus); + ghes_do_proc(estatus_node->ghes, estatus); if (!ghes_estatus_cached(estatus)) { generic = estatus_node->generic; if (ghes_print_estatus(NULL, generic, estatus)) @@ -864,6 +824,7 @@ static int ghes_notify_nmi(unsigned int cmd, struct pt_regs *regs) estatus_node = (void *)gen_pool_alloc(ghes_estatus_pool, node_len); if (estatus_node) { + estatus_node->ghes = ghes; estatus_node->generic = ghes->generic; estatus = GHES_ESTATUS_FROM_NODE(estatus_node); memcpy(estatus, ghes->estatus, len); @@ -942,6 +903,11 @@ static int ghes_probe(struct platform_device *ghes_dev) ghes = NULL; goto err; } + + rc = ghes_edac_register(ghes, &ghes_dev->dev); + if (rc < 0) + goto err; + switch (generic->notify.type) { case ACPI_HEST_NOTIFY_POLLED: ghes->timer.function = ghes_poll_func; @@ -954,13 +920,13 @@ static int ghes_probe(struct platform_device *ghes_dev) if (acpi_gsi_to_irq(generic->notify.vector, &ghes->irq)) { pr_err(GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err; + goto err_edac_unreg; } if (request_irq(ghes->irq, ghes_irq_func, 0, "GHES IRQ", ghes)) { pr_err(GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n", generic->header.source_id); - goto err; + goto err_edac_unreg; } break; case ACPI_HEST_NOTIFY_SCI: @@ -986,6 +952,8 @@ static int ghes_probe(struct platform_device *ghes_dev) platform_set_drvdata(ghes_dev, ghes); return 0; +err_edac_unreg: + ghes_edac_unregister(ghes); err: if (ghes) { ghes_fini(ghes); @@ -1038,6 +1006,9 @@ static int ghes_remove(struct platform_device *ghes_dev) } ghes_fini(ghes); + + ghes_edac_unregister(ghes); + kfree(ghes); platform_set_drvdata(ghes_dev, NULL); diff --git a/drivers/base/dma-buf.c b/drivers/base/dma-buf.c index ff5b745c470..2a7cb0df176 100644 --- a/drivers/base/dma-buf.c +++ b/drivers/base/dma-buf.c @@ -39,6 +39,8 @@ static int dma_buf_release(struct inode *inode, struct file *file) dmabuf = file->private_data; + BUG_ON(dmabuf->vmapping_counter); + dmabuf->ops->release(dmabuf); kfree(dmabuf); return 0; @@ -445,6 +447,9 @@ EXPORT_SYMBOL_GPL(dma_buf_kunmap); int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, unsigned long pgoff) { + struct file *oldfile; + int ret; + if (WARN_ON(!dmabuf || !vma)) return -EINVAL; @@ -458,14 +463,22 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma, return -EINVAL; /* readjust the vma */ - if (vma->vm_file) - fput(vma->vm_file); - - vma->vm_file = get_file(dmabuf->file); - + get_file(dmabuf->file); + oldfile = vma->vm_file; + vma->vm_file = dmabuf->file; vma->vm_pgoff = pgoff; - return dmabuf->ops->mmap(dmabuf, vma); + ret = dmabuf->ops->mmap(dmabuf, vma); + if (ret) { + /* restore old parameters on failure */ + vma->vm_file = oldfile; + fput(dmabuf->file); + } else { + if (oldfile) + fput(oldfile); + } + return ret; + } EXPORT_SYMBOL_GPL(dma_buf_mmap); @@ -481,12 +494,34 @@ EXPORT_SYMBOL_GPL(dma_buf_mmap); */ void *dma_buf_vmap(struct dma_buf *dmabuf) { + void *ptr; + if (WARN_ON(!dmabuf)) return NULL; - if (dmabuf->ops->vmap) - return dmabuf->ops->vmap(dmabuf); - return NULL; + if (!dmabuf->ops->vmap) + return NULL; + + mutex_lock(&dmabuf->lock); + if (dmabuf->vmapping_counter) { + dmabuf->vmapping_counter++; + BUG_ON(!dmabuf->vmap_ptr); + ptr = dmabuf->vmap_ptr; + goto out_unlock; + } + + BUG_ON(dmabuf->vmap_ptr); + + ptr = dmabuf->ops->vmap(dmabuf); + if (IS_ERR_OR_NULL(ptr)) + goto out_unlock; + + dmabuf->vmap_ptr = ptr; + dmabuf->vmapping_counter = 1; + +out_unlock: + mutex_unlock(&dmabuf->lock); + return ptr; } EXPORT_SYMBOL_GPL(dma_buf_vmap); @@ -500,7 +535,16 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr) if (WARN_ON(!dmabuf)) return; - if (dmabuf->ops->vunmap) - dmabuf->ops->vunmap(dmabuf, vaddr); + BUG_ON(!dmabuf->vmap_ptr); + BUG_ON(dmabuf->vmapping_counter == 0); + BUG_ON(dmabuf->vmap_ptr != vaddr); + + mutex_lock(&dmabuf->lock); + if (--dmabuf->vmapping_counter == 0) { + if (dmabuf->ops->vunmap) + dmabuf->ops->vunmap(dmabuf, vaddr); + dmabuf->vmap_ptr = NULL; + } + mutex_unlock(&dmabuf->lock); } EXPORT_SYMBOL_GPL(dma_buf_vunmap); diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 8f12dc78a84..5b5ee79ff23 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -7054,6 +7054,7 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request, else ErrorCode = 0; } + break; default: ErrorCode = -ENOTTY; } diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 824e09c4d0d..5dc0daed8fa 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -63,19 +63,6 @@ config AMIGA_Z2RAM To compile this driver as a module, choose M here: the module will be called z2ram. -config BLK_DEV_XD - tristate "XT hard disk support" - depends on ISA && ISA_DMA_API - select CHECK_SIGNATURE - help - Very old 8 bit hard disk controllers used in the IBM XT computer - will be supported if you say Y here. - - To compile this driver as a module, choose M here: the - module will be called xd. - - It's pretty unlikely that you have one of these: say N. - config GDROM tristate "SEGA Dreamcast GD-ROM drive" depends on SH_DREAMCAST @@ -544,4 +531,14 @@ config BLK_DEV_RBD If unsure, say N. +config BLK_DEV_RSXX + tristate "RamSam PCIe Flash SSD Device Driver" + depends on PCI + help + Device driver for IBM's high speed PCIe SSD + storage devices: RamSan-70 and RamSan-80. + + To compile this driver as a module, choose M here: the + module will be called rsxx. + endif # BLK_DEV diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 17e82df3df7..a3b40232c6a 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_ATARI_FLOPPY) += ataflop.o obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o -obj-$(CONFIG_BLK_DEV_XD) += xd.o obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o @@ -41,4 +40,6 @@ obj-$(CONFIG_BLK_DEV_DRBD) += drbd/ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ +obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ + swim_mod-y := swim.o swim_asm.o diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f47dccbda1d..747bb2af69d 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -162,12 +162,13 @@ static struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { static loff_t get_size(loff_t offset, loff_t sizelimit, struct file *file) { - loff_t size, loopsize; + loff_t loopsize; /* Compute loopsize in bytes */ - size = i_size_read(file->f_mapping->host); - loopsize = size - offset; - /* offset is beyond i_size, wierd but possible */ + loopsize = i_size_read(file->f_mapping->host); + if (offset > 0) + loopsize -= offset; + /* offset is beyond i_size, weird but possible */ if (loopsize < 0) return 0; @@ -190,6 +191,7 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) { loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); sector_t x = (sector_t)size; + struct block_device *bdev = lo->lo_device; if (unlikely((loff_t)x != size)) return -EFBIG; @@ -198,6 +200,9 @@ figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) if (lo->lo_sizelimit != sizelimit) lo->lo_sizelimit = sizelimit; set_capacity(lo->lo_disk, x); + bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); + /* let user-space know about the new size */ + kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); return 0; } @@ -1091,10 +1096,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) return err; if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) { + lo->lo_sizelimit != info->lo_sizelimit) if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) return -EFBIG; - } + loop_config_discard(lo); memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); @@ -1271,28 +1276,10 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { static int loop_set_capacity(struct loop_device *lo, struct block_device *bdev) { - int err; - sector_t sec; - loff_t sz; - - err = -ENXIO; if (unlikely(lo->lo_state != Lo_bound)) - goto out; - err = figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); - if (unlikely(err)) - goto out; - sec = get_capacity(lo->lo_disk); - /* the width of sector_t may be narrow for bit-shift */ - sz = sec; - sz <<= 9; - mutex_lock(&bdev->bd_mutex); - bd_set_size(bdev, sz); - /* let user-space know about the new size */ - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); - mutex_unlock(&bdev->bd_mutex); + return -ENXIO; - out: - return err; + return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); } static int lo_ioctl(struct block_device *bdev, fmode_t mode, @@ -1845,11 +1832,15 @@ static int __init loop_init(void) max_part = (1UL << part_shift) - 1; } - if ((1UL << part_shift) > DISK_MAX_PARTS) - return -EINVAL; + if ((1UL << part_shift) > DISK_MAX_PARTS) { + err = -EINVAL; + goto misc_out; + } - if (max_loop > 1UL << (MINORBITS - part_shift)) - return -EINVAL; + if (max_loop > 1UL << (MINORBITS - part_shift)) { + err = -EINVAL; + goto misc_out; + } /* * If max_loop is specified, create that many devices upfront. @@ -1867,8 +1858,10 @@ static int __init loop_init(void) range = 1UL << MINORBITS; } - if (register_blkdev(LOOP_MAJOR, "loop")) - return -EIO; + if (register_blkdev(LOOP_MAJOR, "loop")) { + err = -EIO; + goto misc_out; + } blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); @@ -1881,6 +1874,10 @@ static int __init loop_init(void) printk(KERN_INFO "loop: module loaded\n"); return 0; + +misc_out: + misc_deregister(&loop_misc); + return err; } static int loop_exit_cb(int id, void *ptr, void *data) diff --git a/drivers/block/mtip32xx/Kconfig b/drivers/block/mtip32xx/Kconfig index 0ba837fc62a..1fca1f996b4 100644 --- a/drivers/block/mtip32xx/Kconfig +++ b/drivers/block/mtip32xx/Kconfig @@ -4,6 +4,6 @@ config BLK_DEV_PCIESSD_MTIP32XX tristate "Block Device Driver for Micron PCIe SSDs" - depends on PCI + depends on PCI && GENERIC_HARDIRQS help This enables the block driver for Micron PCIe SSDs. diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3fd10099045..11cc9522cdd 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -88,6 +88,8 @@ static int instance; static int mtip_major; static struct dentry *dfs_parent; +static u32 cpu_use[NR_CPUS]; + static DEFINE_SPINLOCK(rssd_index_lock); static DEFINE_IDA(rssd_index_ida); @@ -296,16 +298,17 @@ static int hba_reset_nosleep(struct driver_data *dd) */ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) { - atomic_set(&port->commands[tag].active, 1); + int group = tag >> 5; - spin_lock(&port->cmd_issue_lock); + atomic_set(&port->commands[tag].active, 1); + /* guard SACT and CI registers */ + spin_lock(&port->cmd_issue_lock[group]); writel((1 << MTIP_TAG_BIT(tag)), port->s_active[MTIP_TAG_INDEX(tag)]); writel((1 << MTIP_TAG_BIT(tag)), port->cmd_issue[MTIP_TAG_INDEX(tag)]); - - spin_unlock(&port->cmd_issue_lock); + spin_unlock(&port->cmd_issue_lock[group]); /* Set the command's timeout value.*/ port->commands[tag].comp_time = jiffies + msecs_to_jiffies( @@ -964,56 +967,56 @@ handle_tfe_exit: /* * Handle a set device bits interrupt */ -static inline void mtip_process_sdbf(struct driver_data *dd) +static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, + u32 completed) { - struct mtip_port *port = dd->port; - int group, tag, bit; - u32 completed; + struct driver_data *dd = port->dd; + int tag, bit; struct mtip_cmd *command; - /* walk all bits in all slot groups */ - for (group = 0; group < dd->slot_groups; group++) { - completed = readl(port->completed[group]); - if (!completed) - continue; + if (!completed) { + WARN_ON_ONCE(!completed); + return; + } + /* clear completed status register in the hardware.*/ + writel(completed, port->completed[group]); - /* clear completed status register in the hardware.*/ - writel(completed, port->completed[group]); + /* Process completed commands. */ + for (bit = 0; (bit < 32) && completed; bit++) { + if (completed & 0x01) { + tag = (group << 5) | bit; - /* Process completed commands. */ - for (bit = 0; - (bit < 32) && completed; - bit++, completed >>= 1) { - if (completed & 0x01) { - tag = (group << 5) | bit; + /* skip internal command slot. */ + if (unlikely(tag == MTIP_TAG_INTERNAL)) + continue; - /* skip internal command slot. */ - if (unlikely(tag == MTIP_TAG_INTERNAL)) - continue; + command = &port->commands[tag]; + /* make internal callback */ + if (likely(command->comp_func)) { + command->comp_func( + port, + tag, + command->comp_data, + 0); + } else { + dev_warn(&dd->pdev->dev, + "Null completion " + "for tag %d", + tag); - command = &port->commands[tag]; - /* make internal callback */ - if (likely(command->comp_func)) { - command->comp_func( - port, - tag, - command->comp_data, - 0); - } else { - dev_warn(&dd->pdev->dev, - "Null completion " - "for tag %d", - tag); - - if (mtip_check_surprise_removal( - dd->pdev)) { - mtip_command_cleanup(dd); - return; - } + if (mtip_check_surprise_removal( + dd->pdev)) { + mtip_command_cleanup(dd); + return; } } } + completed >>= 1; } + + /* If last, re-enable interrupts */ + if (atomic_dec_return(&dd->irq_workers_active) == 0) + writel(0xffffffff, dd->mmio + HOST_IRQ_STAT); } /* @@ -1072,6 +1075,8 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) struct mtip_port *port = dd->port; u32 hba_stat, port_stat; int rv = IRQ_NONE; + int do_irq_enable = 1, i, workers; + struct mtip_work *twork; hba_stat = readl(dd->mmio + HOST_IRQ_STAT); if (hba_stat) { @@ -1082,8 +1087,42 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) writel(port_stat, port->mmio + PORT_IRQ_STAT); /* Demux port status */ - if (likely(port_stat & PORT_IRQ_SDB_FIS)) - mtip_process_sdbf(dd); + if (likely(port_stat & PORT_IRQ_SDB_FIS)) { + do_irq_enable = 0; + WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0); + + /* Start at 1: group zero is always local? */ + for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS; + i++) { + twork = &dd->work[i]; + twork->completed = readl(port->completed[i]); + if (twork->completed) + workers++; + } + + atomic_set(&dd->irq_workers_active, workers); + if (workers) { + for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) { + twork = &dd->work[i]; + if (twork->completed) + queue_work_on( + twork->cpu_binding, + dd->isr_workq, + &twork->work); + } + + if (likely(dd->work[0].completed)) + mtip_workq_sdbfx(port, 0, + dd->work[0].completed); + + } else { + /* + * Chip quirk: SDB interrupt but nothing + * to complete + */ + do_irq_enable = 1; + } + } if (unlikely(port_stat & PORT_IRQ_ERR)) { if (unlikely(mtip_check_surprise_removal(dd->pdev))) { @@ -1103,21 +1142,13 @@ static inline irqreturn_t mtip_handle_irq(struct driver_data *data) } /* acknowledge interrupt */ - writel(hba_stat, dd->mmio + HOST_IRQ_STAT); + if (unlikely(do_irq_enable)) + writel(hba_stat, dd->mmio + HOST_IRQ_STAT); return rv; } /* - * Wrapper for mtip_handle_irq - * (ignores return code) - */ -static void mtip_tasklet(unsigned long data) -{ - mtip_handle_irq((struct driver_data *) data); -} - -/* * HBA interrupt subroutine. * * @irq IRQ number. @@ -1130,8 +1161,8 @@ static void mtip_tasklet(unsigned long data) static irqreturn_t mtip_irq_handler(int irq, void *instance) { struct driver_data *dd = instance; - tasklet_schedule(&dd->tasklet); - return IRQ_HANDLED; + + return mtip_handle_irq(dd); } static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) @@ -1489,6 +1520,12 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) } #endif + /* Demux ID.DRAT & ID.RZAT to determine trim support */ + if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5)) + port->dd->trim_supp = true; + else + port->dd->trim_supp = false; + /* Set the identify buffer as valid. */ port->identify_valid = 1; @@ -1676,6 +1713,81 @@ static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id, } /* + * Trim unused sectors + * + * @dd pointer to driver_data structure + * @lba starting lba + * @len # of 512b sectors to trim + * + * return value + * -ENOMEM Out of dma memory + * -EINVAL Invalid parameters passed in, trim not supported + * -EIO Error submitting trim request to hw + */ +static int mtip_send_trim(struct driver_data *dd, unsigned int lba, unsigned int len) +{ + int i, rv = 0; + u64 tlba, tlen, sect_left; + struct mtip_trim_entry *buf; + dma_addr_t dma_addr; + struct host_to_dev_fis fis; + + if (!len || dd->trim_supp == false) + return -EINVAL; + + /* Trim request too big */ + WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES)); + + /* Trim request not aligned on 4k boundary */ + WARN_ON(len % 8 != 0); + + /* Warn if vu_trim structure is too big */ + WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE); + + /* Allocate a DMA buffer for the trim structure */ + buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr, + GFP_KERNEL); + if (!buf) + return -ENOMEM; + memset(buf, 0, ATA_SECT_SIZE); + + for (i = 0, sect_left = len, tlba = lba; + i < MTIP_MAX_TRIM_ENTRIES && sect_left; + i++) { + tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ? + MTIP_MAX_TRIM_ENTRY_LEN : + sect_left); + buf[i].lba = __force_bit2int cpu_to_le32(tlba); + buf[i].range = __force_bit2int cpu_to_le16(tlen); + tlba += tlen; + sect_left -= tlen; + } + WARN_ON(sect_left != 0); + + /* Build the fis */ + memset(&fis, 0, sizeof(struct host_to_dev_fis)); + fis.type = 0x27; + fis.opts = 1 << 7; + fis.command = 0xfb; + fis.features = 0x60; + fis.sect_count = 1; + fis.device = ATA_DEVICE_OBS; + + if (mtip_exec_internal_command(dd->port, + &fis, + 5, + dma_addr, + ATA_SECT_SIZE, + 0, + GFP_KERNEL, + MTIP_TRIM_TIMEOUT_MS) < 0) + rv = -EIO; + + dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr); + return rv; +} + +/* * Get the drive capacity. * * @dd Pointer to the device data structure. @@ -3005,20 +3117,24 @@ static int mtip_hw_init(struct driver_data *dd) hba_setup(dd); - tasklet_init(&dd->tasklet, mtip_tasklet, (unsigned long)dd); - - dd->port = kzalloc(sizeof(struct mtip_port), GFP_KERNEL); + dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL, + dd->numa_node); if (!dd->port) { dev_err(&dd->pdev->dev, "Memory allocation: port structure\n"); return -ENOMEM; } + /* Continue workqueue setup */ + for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) + dd->work[i].port = dd->port; + /* Counting semaphore to track command slot usage */ sema_init(&dd->port->cmd_slot, num_command_slots - 1); /* Spinlock to prevent concurrent issue */ - spin_lock_init(&dd->port->cmd_issue_lock); + for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++) + spin_lock_init(&dd->port->cmd_issue_lock[i]); /* Set the port mmio base address. */ dd->port->mmio = dd->mmio + PORT_OFFSET; @@ -3165,6 +3281,7 @@ static int mtip_hw_init(struct driver_data *dd) "Unable to allocate IRQ %d\n", dd->pdev->irq); goto out2; } + irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding)); /* Enable interrupts on the HBA. */ writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, @@ -3241,7 +3358,8 @@ out3: writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, dd->mmio + HOST_CTL); - /*Release the IRQ. */ + /* Release the IRQ. */ + irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); out2: @@ -3291,11 +3409,9 @@ static int mtip_hw_exit(struct driver_data *dd) del_timer_sync(&dd->port->cmd_timer); /* Release the IRQ. */ + irq_set_affinity_hint(dd->pdev->irq, NULL); devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); - /* Stop the bottom half tasklet. */ - tasklet_kill(&dd->tasklet); - /* Free the command/command header memory. */ dmam_free_coherent(&dd->pdev->dev, HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4), @@ -3641,6 +3757,12 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio) } } + if (unlikely(bio->bi_rw & REQ_DISCARD)) { + bio_endio(bio, mtip_send_trim(dd, bio->bi_sector, + bio_sectors(bio))); + return; + } + if (unlikely(!bio_has_data(bio))) { blk_queue_flush(queue, 0); bio_endio(bio, 0); @@ -3711,7 +3833,7 @@ static int mtip_block_initialize(struct driver_data *dd) goto protocol_init_error; } - dd->disk = alloc_disk(MTIP_MAX_MINORS); + dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node); if (dd->disk == NULL) { dev_err(&dd->pdev->dev, "Unable to allocate gendisk structure\n"); @@ -3755,7 +3877,7 @@ static int mtip_block_initialize(struct driver_data *dd) skip_create_disk: /* Allocate the request queue. */ - dd->queue = blk_alloc_queue(GFP_KERNEL); + dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node); if (dd->queue == NULL) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); @@ -3783,6 +3905,15 @@ skip_create_disk: */ blk_queue_flush(dd->queue, 0); + /* Signal trim support */ + if (dd->trim_supp == true) { + set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags); + dd->queue->limits.discard_granularity = 4096; + blk_queue_max_discard_sectors(dd->queue, + MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES); + dd->queue->limits.discard_zeroes_data = 0; + } + /* Set the capacity of the device in 512 byte sectors. */ if (!(mtip_hw_get_capacity(dd, &capacity))) { dev_warn(&dd->pdev->dev, @@ -3813,9 +3944,8 @@ skip_create_disk: start_service_thread: sprintf(thd_name, "mtip_svc_thd_%02d", index); - - dd->mtip_svc_handler = kthread_run(mtip_service_thread, - dd, thd_name); + dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread, + dd, dd->numa_node, thd_name); if (IS_ERR(dd->mtip_svc_handler)) { dev_err(&dd->pdev->dev, "service thread failed to start\n"); @@ -3823,7 +3953,7 @@ start_service_thread: rv = -EFAULT; goto kthread_run_error; } - + wake_up_process(dd->mtip_svc_handler); if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) rv = wait_for_rebuild; @@ -3963,6 +4093,56 @@ static int mtip_block_resume(struct driver_data *dd) return 0; } +static void drop_cpu(int cpu) +{ + cpu_use[cpu]--; +} + +static int get_least_used_cpu_on_node(int node) +{ + int cpu, least_used_cpu, least_cnt; + const struct cpumask *node_mask; + + node_mask = cpumask_of_node(node); + least_used_cpu = cpumask_first(node_mask); + least_cnt = cpu_use[least_used_cpu]; + cpu = least_used_cpu; + + for_each_cpu(cpu, node_mask) { + if (cpu_use[cpu] < least_cnt) { + least_used_cpu = cpu; + least_cnt = cpu_use[cpu]; + } + } + cpu_use[least_used_cpu]++; + return least_used_cpu; +} + +/* Helper for selecting a node in round robin mode */ +static inline int mtip_get_next_rr_node(void) +{ + static int next_node = -1; + + if (next_node == -1) { + next_node = first_online_node; + return next_node; + } + + next_node = next_online_node(next_node); + if (next_node == MAX_NUMNODES) + next_node = first_online_node; + return next_node; +} + +static DEFINE_HANDLER(0); +static DEFINE_HANDLER(1); +static DEFINE_HANDLER(2); +static DEFINE_HANDLER(3); +static DEFINE_HANDLER(4); +static DEFINE_HANDLER(5); +static DEFINE_HANDLER(6); +static DEFINE_HANDLER(7); + /* * Called for each supported PCI device detected. * @@ -3977,9 +4157,25 @@ static int mtip_pci_probe(struct pci_dev *pdev, { int rv = 0; struct driver_data *dd = NULL; + char cpu_list[256]; + const struct cpumask *node_mask; + int cpu, i = 0, j = 0; + int my_node = NUMA_NO_NODE; /* Allocate memory for this devices private data. */ - dd = kzalloc(sizeof(struct driver_data), GFP_KERNEL); + my_node = pcibus_to_node(pdev->bus); + if (my_node != NUMA_NO_NODE) { + if (!node_online(my_node)) + my_node = mtip_get_next_rr_node(); + } else { + dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n"); + my_node = mtip_get_next_rr_node(); + } + dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n", + my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev), + cpu_to_node(smp_processor_id()), smp_processor_id()); + + dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node); if (dd == NULL) { dev_err(&pdev->dev, "Unable to allocate memory for driver data\n"); @@ -4016,19 +4212,82 @@ static int mtip_pci_probe(struct pci_dev *pdev, } } - pci_set_master(pdev); + /* Copy the info we may need later into the private data structure. */ + dd->major = mtip_major; + dd->instance = instance; + dd->pdev = pdev; + dd->numa_node = my_node; + memset(dd->workq_name, 0, 32); + snprintf(dd->workq_name, 31, "mtipq%d", dd->instance); + + dd->isr_workq = create_workqueue(dd->workq_name); + if (!dd->isr_workq) { + dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance); + goto block_initialize_err; + } + + memset(cpu_list, 0, sizeof(cpu_list)); + + node_mask = cpumask_of_node(dd->numa_node); + if (!cpumask_empty(node_mask)) { + for_each_cpu(cpu, node_mask) + { + snprintf(&cpu_list[j], 256 - j, "%d ", cpu); + j = strlen(cpu_list); + } + + dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n", + dd->numa_node, + topology_physical_package_id(cpumask_first(node_mask)), + nr_cpus_node(dd->numa_node), + cpu_list); + } else + dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n"); + + dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node); + dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n", + cpu_to_node(dd->isr_binding), dd->isr_binding); + + /* first worker context always runs in ISR */ + dd->work[0].cpu_binding = dd->isr_binding; + dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node); + dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node); + dd->work[3].cpu_binding = dd->work[0].cpu_binding; + dd->work[4].cpu_binding = dd->work[1].cpu_binding; + dd->work[5].cpu_binding = dd->work[2].cpu_binding; + dd->work[6].cpu_binding = dd->work[2].cpu_binding; + dd->work[7].cpu_binding = dd->work[1].cpu_binding; + + /* Log the bindings */ + for_each_present_cpu(cpu) { + memset(cpu_list, 0, sizeof(cpu_list)); + for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) { + if (dd->work[i].cpu_binding == cpu) { + snprintf(&cpu_list[j], 256 - j, "%d ", i); + j = strlen(cpu_list); + } + } + if (j) + dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list); + } + + INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0); + INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1); + INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2); + INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3); + INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4); + INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5); + INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6); + INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7); + + pci_set_master(pdev); if (pci_enable_msi(pdev)) { dev_warn(&pdev->dev, "Unable to enable MSI interrupt.\n"); goto block_initialize_err; } - /* Copy the info we may need later into the private data structure. */ - dd->major = mtip_major; - dd->instance = instance; - dd->pdev = pdev; - /* Initialize the block layer. */ rv = mtip_block_initialize(dd); if (rv < 0) { @@ -4048,7 +4307,13 @@ static int mtip_pci_probe(struct pci_dev *pdev, block_initialize_err: pci_disable_msi(pdev); - + if (dd->isr_workq) { + flush_workqueue(dd->isr_workq); + destroy_workqueue(dd->isr_workq); + drop_cpu(dd->work[0].cpu_binding); + drop_cpu(dd->work[1].cpu_binding); + drop_cpu(dd->work[2].cpu_binding); + } setmask_err: pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); @@ -4089,6 +4354,14 @@ static void mtip_pci_remove(struct pci_dev *pdev) /* Clean up the block layer. */ mtip_block_remove(dd); + if (dd->isr_workq) { + flush_workqueue(dd->isr_workq); + destroy_workqueue(dd->isr_workq); + drop_cpu(dd->work[0].cpu_binding); + drop_cpu(dd->work[1].cpu_binding); + drop_cpu(dd->work[2].cpu_binding); + } + pci_disable_msi(pdev); kfree(dd); diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index b1742640556..3bffff5f670 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -164,6 +164,35 @@ struct smart_attr { u8 res[3]; } __packed; +struct mtip_work { + struct work_struct work; + void *port; + int cpu_binding; + u32 completed; +} ____cacheline_aligned_in_smp; + +#define DEFINE_HANDLER(group) \ + void mtip_workq_sdbf##group(struct work_struct *work) \ + { \ + struct mtip_work *w = (struct mtip_work *) work; \ + mtip_workq_sdbfx(w->port, group, w->completed); \ + } + +#define MTIP_TRIM_TIMEOUT_MS 240000 +#define MTIP_MAX_TRIM_ENTRIES 8 +#define MTIP_MAX_TRIM_ENTRY_LEN 0xfff8 + +struct mtip_trim_entry { + u32 lba; /* starting lba of region */ + u16 rsvd; /* unused */ + u16 range; /* # of 512b blocks to trim */ +} __packed; + +struct mtip_trim { + /* Array of regions to trim */ + struct mtip_trim_entry entry[MTIP_MAX_TRIM_ENTRIES]; +} __packed; + /* Register Frame Information Structure (FIS), host to device. */ struct host_to_dev_fis { /* @@ -424,7 +453,7 @@ struct mtip_port { */ struct semaphore cmd_slot; /* Spinlock for working around command-issue bug. */ - spinlock_t cmd_issue_lock; + spinlock_t cmd_issue_lock[MTIP_MAX_SLOT_GROUPS]; }; /* @@ -447,9 +476,6 @@ struct driver_data { struct mtip_port *port; /* Pointer to the port data structure. */ - /* Tasklet used to process the bottom half of the ISR. */ - struct tasklet_struct tasklet; - unsigned product_type; /* magic value declaring the product type */ unsigned slot_groups; /* number of slot groups the product supports */ @@ -461,6 +487,20 @@ struct driver_data { struct task_struct *mtip_svc_handler; /* task_struct of svc thd */ struct dentry *dfs_node; + + bool trim_supp; /* flag indicating trim support */ + + int numa_node; /* NUMA support */ + + char workq_name[32]; + + struct workqueue_struct *isr_workq; + + struct mtip_work work[MTIP_MAX_SLOT_GROUPS]; + + atomic_t irq_workers_active; + + int isr_binding; }; #endif diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 89576a0b3f2..6c81a4c040b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -52,9 +52,12 @@ #define SECTOR_SHIFT 9 #define SECTOR_SIZE (1ULL << SECTOR_SHIFT) -/* It might be useful to have this defined elsewhere too */ +/* It might be useful to have these defined elsewhere */ -#define U64_MAX ((u64) (~0ULL)) +#define U8_MAX ((u8) (~0U)) +#define U16_MAX ((u16) (~0U)) +#define U32_MAX ((u32) (~0U)) +#define U64_MAX ((u64) (~0ULL)) #define RBD_DRV_NAME "rbd" #define RBD_DRV_NAME_LONG "rbd (rados block device)" @@ -66,7 +69,6 @@ (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) #define RBD_MAX_SNAP_COUNT 510 /* allows max snapc to fit in 4KB */ -#define RBD_MAX_OPT_LEN 1024 #define RBD_SNAP_HEAD_NAME "-" @@ -93,8 +95,6 @@ #define DEV_NAME_LEN 32 #define MAX_INT_FORMAT_WIDTH ((5 * sizeof (int)) / 2 + 1) -#define RBD_READ_ONLY_DEFAULT false - /* * block device image metadata (in-memory version) */ @@ -119,16 +119,33 @@ struct rbd_image_header { * An rbd image specification. * * The tuple (pool_id, image_id, snap_id) is sufficient to uniquely - * identify an image. + * identify an image. Each rbd_dev structure includes a pointer to + * an rbd_spec structure that encapsulates this identity. + * + * Each of the id's in an rbd_spec has an associated name. For a + * user-mapped image, the names are supplied and the id's associated + * with them are looked up. For a layered image, a parent image is + * defined by the tuple, and the names are looked up. + * + * An rbd_dev structure contains a parent_spec pointer which is + * non-null if the image it represents is a child in a layered + * image. This pointer will refer to the rbd_spec structure used + * by the parent rbd_dev for its own identity (i.e., the structure + * is shared between the parent and child). + * + * Since these structures are populated once, during the discovery + * phase of image construction, they are effectively immutable so + * we make no effort to synchronize access to them. + * + * Note that code herein does not assume the image name is known (it + * could be a null pointer). */ struct rbd_spec { u64 pool_id; char *pool_name; char *image_id; - size_t image_id_len; char *image_name; - size_t image_name_len; u64 snap_id; char *snap_name; @@ -136,10 +153,6 @@ struct rbd_spec { struct kref kref; }; -struct rbd_options { - bool read_only; -}; - /* * an instance of the client. multiple devices may share an rbd client. */ @@ -149,37 +162,76 @@ struct rbd_client { struct list_head node; }; -/* - * a request completion status - */ -struct rbd_req_status { - int done; - int rc; - u64 bytes; +struct rbd_img_request; +typedef void (*rbd_img_callback_t)(struct rbd_img_request *); + +#define BAD_WHICH U32_MAX /* Good which or bad which, which? */ + +struct rbd_obj_request; +typedef void (*rbd_obj_callback_t)(struct rbd_obj_request *); + +enum obj_request_type { + OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES }; -/* - * a collection of requests - */ -struct rbd_req_coll { - int total; - int num_done; +struct rbd_obj_request { + const char *object_name; + u64 offset; /* object start byte */ + u64 length; /* bytes from offset */ + + struct rbd_img_request *img_request; + struct list_head links; /* img_request->obj_requests */ + u32 which; /* posn image request list */ + + enum obj_request_type type; + union { + struct bio *bio_list; + struct { + struct page **pages; + u32 page_count; + }; + }; + + struct ceph_osd_request *osd_req; + + u64 xferred; /* bytes transferred */ + u64 version; + int result; + atomic_t done; + + rbd_obj_callback_t callback; + struct completion completion; + struct kref kref; - struct rbd_req_status status[0]; }; -/* - * a single io request - */ -struct rbd_request { - struct request *rq; /* blk layer request */ - struct bio *bio; /* cloned bio */ - struct page **pages; /* list of used pages */ - u64 len; - int coll_index; - struct rbd_req_coll *coll; +struct rbd_img_request { + struct request *rq; + struct rbd_device *rbd_dev; + u64 offset; /* starting image byte offset */ + u64 length; /* byte count from offset */ + bool write_request; /* false for read */ + union { + struct ceph_snap_context *snapc; /* for writes */ + u64 snap_id; /* for reads */ + }; + spinlock_t completion_lock;/* protects next_completion */ + u32 next_completion; + rbd_img_callback_t callback; + + u32 obj_request_count; + struct list_head obj_requests; /* rbd_obj_request structs */ + + struct kref kref; }; +#define for_each_obj_request(ireq, oreq) \ + list_for_each_entry(oreq, &(ireq)->obj_requests, links) +#define for_each_obj_request_from(ireq, oreq) \ + list_for_each_entry_from(oreq, &(ireq)->obj_requests, links) +#define for_each_obj_request_safe(ireq, oreq, n) \ + list_for_each_entry_safe_reverse(oreq, n, &(ireq)->obj_requests, links) + struct rbd_snap { struct device dev; const char *name; @@ -209,16 +261,18 @@ struct rbd_device { char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ - spinlock_t lock; /* queue lock */ + spinlock_t lock; /* queue, flags, open_count */ struct rbd_image_header header; - bool exists; + unsigned long flags; /* possibly lock protected */ struct rbd_spec *spec; char *header_name; + struct ceph_file_layout layout; + struct ceph_osd_event *watch_event; - struct ceph_osd_request *watch_request; + struct rbd_obj_request *watch_request; struct rbd_spec *parent_spec; u64 parent_overlap; @@ -235,7 +289,19 @@ struct rbd_device { /* sysfs related */ struct device dev; - unsigned long open_count; + unsigned long open_count; /* protected by lock */ +}; + +/* + * Flag bits for rbd_dev->flags. If atomicity is required, + * rbd_dev->lock is used to protect access. + * + * Currently, only the "removing" flag (which is coupled with the + * "open_count" field) requires atomic access. + */ +enum rbd_dev_flags { + RBD_DEV_FLAG_EXISTS, /* mapped snapshot has not been deleted */ + RBD_DEV_FLAG_REMOVING, /* this mapping is being removed */ }; static DEFINE_MUTEX(ctl_mutex); /* Serialize open/close/setup/teardown */ @@ -277,6 +343,33 @@ static struct device rbd_root_dev = { .release = rbd_root_dev_release, }; +static __printf(2, 3) +void rbd_warn(struct rbd_device *rbd_dev, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + if (!rbd_dev) + printk(KERN_WARNING "%s: %pV\n", RBD_DRV_NAME, &vaf); + else if (rbd_dev->disk) + printk(KERN_WARNING "%s: %s: %pV\n", + RBD_DRV_NAME, rbd_dev->disk->disk_name, &vaf); + else if (rbd_dev->spec && rbd_dev->spec->image_name) + printk(KERN_WARNING "%s: image %s: %pV\n", + RBD_DRV_NAME, rbd_dev->spec->image_name, &vaf); + else if (rbd_dev->spec && rbd_dev->spec->image_id) + printk(KERN_WARNING "%s: id %s: %pV\n", + RBD_DRV_NAME, rbd_dev->spec->image_id, &vaf); + else /* punt */ + printk(KERN_WARNING "%s: rbd_dev %p: %pV\n", + RBD_DRV_NAME, rbd_dev, &vaf); + va_end(args); +} + #ifdef RBD_DEBUG #define rbd_assert(expr) \ if (unlikely(!(expr))) { \ @@ -296,14 +389,23 @@ static int rbd_dev_v2_refresh(struct rbd_device *rbd_dev, u64 *hver); static int rbd_open(struct block_device *bdev, fmode_t mode) { struct rbd_device *rbd_dev = bdev->bd_disk->private_data; + bool removing = false; if ((mode & FMODE_WRITE) && rbd_dev->mapping.read_only) return -EROFS; + spin_lock_irq(&rbd_dev->lock); + if (test_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags)) + removing = true; + else + rbd_dev->open_count++; + spin_unlock_irq(&rbd_dev->lock); + if (removing) + return -ENOENT; + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); (void) get_device(&rbd_dev->dev); set_device_ro(bdev, rbd_dev->mapping.read_only); - rbd_dev->open_count++; mutex_unlock(&ctl_mutex); return 0; @@ -312,10 +414,14 @@ static int rbd_open(struct block_device *bdev, fmode_t mode) static int rbd_release(struct gendisk *disk, fmode_t mode) { struct rbd_device *rbd_dev = disk->private_data; + unsigned long open_count_before; + + spin_lock_irq(&rbd_dev->lock); + open_count_before = rbd_dev->open_count--; + spin_unlock_irq(&rbd_dev->lock); + rbd_assert(open_count_before > 0); mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); - rbd_assert(rbd_dev->open_count > 0); - rbd_dev->open_count--; put_device(&rbd_dev->dev); mutex_unlock(&ctl_mutex); @@ -337,7 +443,7 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) struct rbd_client *rbdc; int ret = -ENOMEM; - dout("rbd_client_create\n"); + dout("%s:\n", __func__); rbdc = kmalloc(sizeof(struct rbd_client), GFP_KERNEL); if (!rbdc) goto out_opt; @@ -361,8 +467,8 @@ static struct rbd_client *rbd_client_create(struct ceph_options *ceph_opts) spin_unlock(&rbd_client_list_lock); mutex_unlock(&ctl_mutex); + dout("%s: rbdc %p\n", __func__, rbdc); - dout("rbd_client_create created %p\n", rbdc); return rbdc; out_err: @@ -373,6 +479,8 @@ out_mutex: out_opt: if (ceph_opts) ceph_destroy_options(ceph_opts); + dout("%s: error %d\n", __func__, ret); + return ERR_PTR(ret); } @@ -426,6 +534,12 @@ static match_table_t rbd_opts_tokens = { {-1, NULL} }; +struct rbd_options { + bool read_only; +}; + +#define RBD_READ_ONLY_DEFAULT false + static int parse_rbd_opts_token(char *c, void *private) { struct rbd_options *rbd_opts = private; @@ -493,7 +607,7 @@ static void rbd_client_release(struct kref *kref) { struct rbd_client *rbdc = container_of(kref, struct rbd_client, kref); - dout("rbd_release_client %p\n", rbdc); + dout("%s: rbdc %p\n", __func__, rbdc); spin_lock(&rbd_client_list_lock); list_del(&rbdc->node); spin_unlock(&rbd_client_list_lock); @@ -512,18 +626,6 @@ static void rbd_put_client(struct rbd_client *rbdc) kref_put(&rbdc->kref, rbd_client_release); } -/* - * Destroy requests collection - */ -static void rbd_coll_release(struct kref *kref) -{ - struct rbd_req_coll *coll = - container_of(kref, struct rbd_req_coll, kref); - - dout("rbd_coll_release %p\n", coll); - kfree(coll); -} - static bool rbd_image_format_valid(u32 image_format) { return image_format == 1 || image_format == 2; @@ -707,7 +809,8 @@ static int rbd_dev_set_mapping(struct rbd_device *rbd_dev) goto done; rbd_dev->mapping.read_only = true; } - rbd_dev->exists = true; + set_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); + done: return ret; } @@ -724,7 +827,7 @@ static void rbd_header_free(struct rbd_image_header *header) header->snapc = NULL; } -static char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) +static const char *rbd_segment_name(struct rbd_device *rbd_dev, u64 offset) { char *name; u64 segment; @@ -767,23 +870,6 @@ static u64 rbd_segment_length(struct rbd_device *rbd_dev, return length; } -static int rbd_get_num_segments(struct rbd_image_header *header, - u64 ofs, u64 len) -{ - u64 start_seg; - u64 end_seg; - - if (!len) - return 0; - if (len - 1 > U64_MAX - ofs) - return -ERANGE; - - start_seg = ofs >> header->obj_order; - end_seg = (ofs + len - 1) >> header->obj_order; - - return end_seg - start_seg + 1; -} - /* * returns the size of an object in the image */ @@ -949,8 +1035,10 @@ static struct bio *bio_chain_clone_range(struct bio **bio_src, unsigned int bi_size; struct bio *bio; - if (!bi) + if (!bi) { + rbd_warn(NULL, "bio_chain exhausted with %u left", len); goto out_err; /* EINVAL; ran out of bio's */ + } bi_size = min_t(unsigned int, bi->bi_size - off, len); bio = bio_clone_range(bi, off, bi_size, gfpmask); if (!bio) @@ -976,399 +1064,721 @@ out_err: return NULL; } -/* - * helpers for osd request op vectors. - */ -static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, - int opcode, u32 payload_len) +static void rbd_obj_request_get(struct rbd_obj_request *obj_request) { - struct ceph_osd_req_op *ops; + dout("%s: obj %p (was %d)\n", __func__, obj_request, + atomic_read(&obj_request->kref.refcount)); + kref_get(&obj_request->kref); +} + +static void rbd_obj_request_destroy(struct kref *kref); +static void rbd_obj_request_put(struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request != NULL); + dout("%s: obj %p (was %d)\n", __func__, obj_request, + atomic_read(&obj_request->kref.refcount)); + kref_put(&obj_request->kref, rbd_obj_request_destroy); +} + +static void rbd_img_request_get(struct rbd_img_request *img_request) +{ + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_get(&img_request->kref); +} + +static void rbd_img_request_destroy(struct kref *kref); +static void rbd_img_request_put(struct rbd_img_request *img_request) +{ + rbd_assert(img_request != NULL); + dout("%s: img %p (was %d)\n", __func__, img_request, + atomic_read(&img_request->kref.refcount)); + kref_put(&img_request->kref, rbd_img_request_destroy); +} + +static inline void rbd_img_obj_request_add(struct rbd_img_request *img_request, + struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request->img_request == NULL); + + rbd_obj_request_get(obj_request); + obj_request->img_request = img_request; + obj_request->which = img_request->obj_request_count; + rbd_assert(obj_request->which != BAD_WHICH); + img_request->obj_request_count++; + list_add_tail(&obj_request->links, &img_request->obj_requests); + dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, + obj_request->which); +} - ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); - if (!ops) +static inline void rbd_img_obj_request_del(struct rbd_img_request *img_request, + struct rbd_obj_request *obj_request) +{ + rbd_assert(obj_request->which != BAD_WHICH); + + dout("%s: img %p obj %p w=%u\n", __func__, img_request, obj_request, + obj_request->which); + list_del(&obj_request->links); + rbd_assert(img_request->obj_request_count > 0); + img_request->obj_request_count--; + rbd_assert(obj_request->which == img_request->obj_request_count); + obj_request->which = BAD_WHICH; + rbd_assert(obj_request->img_request == img_request); + obj_request->img_request = NULL; + obj_request->callback = NULL; + rbd_obj_request_put(obj_request); +} + +static bool obj_request_type_valid(enum obj_request_type type) +{ + switch (type) { + case OBJ_REQUEST_NODATA: + case OBJ_REQUEST_BIO: + case OBJ_REQUEST_PAGES: + return true; + default: + return false; + } +} + +static struct ceph_osd_req_op *rbd_osd_req_op_create(u16 opcode, ...) +{ + struct ceph_osd_req_op *op; + va_list args; + size_t size; + + op = kzalloc(sizeof (*op), GFP_NOIO); + if (!op) return NULL; + op->op = opcode; + va_start(args, opcode); + switch (opcode) { + case CEPH_OSD_OP_READ: + case CEPH_OSD_OP_WRITE: + /* rbd_osd_req_op_create(READ, offset, length) */ + /* rbd_osd_req_op_create(WRITE, offset, length) */ + op->extent.offset = va_arg(args, u64); + op->extent.length = va_arg(args, u64); + if (opcode == CEPH_OSD_OP_WRITE) + op->payload_len = op->extent.length; + break; + case CEPH_OSD_OP_STAT: + break; + case CEPH_OSD_OP_CALL: + /* rbd_osd_req_op_create(CALL, class, method, data, datalen) */ + op->cls.class_name = va_arg(args, char *); + size = strlen(op->cls.class_name); + rbd_assert(size <= (size_t) U8_MAX); + op->cls.class_len = size; + op->payload_len = size; + + op->cls.method_name = va_arg(args, char *); + size = strlen(op->cls.method_name); + rbd_assert(size <= (size_t) U8_MAX); + op->cls.method_len = size; + op->payload_len += size; + + op->cls.argc = 0; + op->cls.indata = va_arg(args, void *); + size = va_arg(args, size_t); + rbd_assert(size <= (size_t) U32_MAX); + op->cls.indata_len = (u32) size; + op->payload_len += size; + break; + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + /* rbd_osd_req_op_create(NOTIFY_ACK, cookie, version) */ + /* rbd_osd_req_op_create(WATCH, cookie, version, flag) */ + op->watch.cookie = va_arg(args, u64); + op->watch.ver = va_arg(args, u64); + op->watch.ver = cpu_to_le64(op->watch.ver); + if (opcode == CEPH_OSD_OP_WATCH && va_arg(args, int)) + op->watch.flag = (u8) 1; + break; + default: + rbd_warn(NULL, "unsupported opcode %hu\n", opcode); + kfree(op); + op = NULL; + break; + } + va_end(args); - ops[0].op = opcode; + return op; +} - /* - * op extent offset and length will be set later on - * in calc_raw_layout() - */ - ops[0].payload_len = payload_len; +static void rbd_osd_req_op_destroy(struct ceph_osd_req_op *op) +{ + kfree(op); +} + +static int rbd_obj_request_submit(struct ceph_osd_client *osdc, + struct rbd_obj_request *obj_request) +{ + dout("%s: osdc %p obj %p\n", __func__, osdc, obj_request); - return ops; + return ceph_osdc_start_request(osdc, obj_request->osd_req, false); } -static void rbd_destroy_ops(struct ceph_osd_req_op *ops) +static void rbd_img_request_complete(struct rbd_img_request *img_request) { - kfree(ops); + dout("%s: img %p\n", __func__, img_request); + if (img_request->callback) + img_request->callback(img_request); + else + rbd_img_request_put(img_request); } -static void rbd_coll_end_req_index(struct request *rq, - struct rbd_req_coll *coll, - int index, - int ret, u64 len) +/* Caller is responsible for rbd_obj_request_destroy(obj_request) */ + +static int rbd_obj_request_wait(struct rbd_obj_request *obj_request) { - struct request_queue *q; - int min, max, i; + dout("%s: obj %p\n", __func__, obj_request); - dout("rbd_coll_end_req_index %p index %d ret %d len %llu\n", - coll, index, ret, (unsigned long long) len); + return wait_for_completion_interruptible(&obj_request->completion); +} - if (!rq) - return; +static void obj_request_done_init(struct rbd_obj_request *obj_request) +{ + atomic_set(&obj_request->done, 0); + smp_wmb(); +} - if (!coll) { - blk_end_request(rq, ret, len); - return; +static void obj_request_done_set(struct rbd_obj_request *obj_request) +{ + int done; + + done = atomic_inc_return(&obj_request->done); + if (done > 1) { + struct rbd_img_request *img_request = obj_request->img_request; + struct rbd_device *rbd_dev; + + rbd_dev = img_request ? img_request->rbd_dev : NULL; + rbd_warn(rbd_dev, "obj_request %p was already done\n", + obj_request); } +} - q = rq->q; - - spin_lock_irq(q->queue_lock); - coll->status[index].done = 1; - coll->status[index].rc = ret; - coll->status[index].bytes = len; - max = min = coll->num_done; - while (max < coll->total && coll->status[max].done) - max++; - - for (i = min; i<max; i++) { - __blk_end_request(rq, coll->status[i].rc, - coll->status[i].bytes); - coll->num_done++; - kref_put(&coll->kref, rbd_coll_release); +static bool obj_request_done_test(struct rbd_obj_request *obj_request) +{ + smp_mb(); + return atomic_read(&obj_request->done) != 0; +} + +static void rbd_obj_request_complete(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p cb %p\n", __func__, obj_request, + obj_request->callback); + if (obj_request->callback) + obj_request->callback(obj_request); + else + complete_all(&obj_request->completion); +} + +static void rbd_osd_trivial_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + obj_request_done_set(obj_request); +} + +static void rbd_osd_read_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p result %d %llu/%llu\n", __func__, obj_request, + obj_request->result, obj_request->xferred, obj_request->length); + /* + * ENOENT means a hole in the object. We zero-fill the + * entire length of the request. A short read also implies + * zero-fill to the end of the request. Either way we + * update the xferred count to indicate the whole request + * was satisfied. + */ + if (obj_request->result == -ENOENT) { + zero_bio_chain(obj_request->bio_list, 0); + obj_request->result = 0; + obj_request->xferred = obj_request->length; + } else if (obj_request->xferred < obj_request->length && + !obj_request->result) { + zero_bio_chain(obj_request->bio_list, obj_request->xferred); + obj_request->xferred = obj_request->length; } - spin_unlock_irq(q->queue_lock); + obj_request_done_set(obj_request); } -static void rbd_coll_end_req(struct rbd_request *req, - int ret, u64 len) +static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) { - rbd_coll_end_req_index(req->rq, req->coll, req->coll_index, ret, len); + dout("%s: obj %p result %d %llu\n", __func__, obj_request, + obj_request->result, obj_request->length); + /* + * There is no such thing as a successful short write. + * Our xferred value is the number of bytes transferred + * back. Set it to our originally-requested length. + */ + obj_request->xferred = obj_request->length; + obj_request_done_set(obj_request); } /* - * Send ceph osd request + * For a simple stat call there's nothing to do. We'll do more if + * this is part of a write sequence for a layered image. */ -static int rbd_do_request(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 snapid, - const char *object_name, u64 ofs, u64 len, - struct bio *bio, - struct page **pages, - int num_pages, - int flags, - struct ceph_osd_req_op *ops, - struct rbd_req_coll *coll, - int coll_index, - void (*rbd_cb)(struct ceph_osd_request *req, - struct ceph_msg *msg), - struct ceph_osd_request **linger_req, - u64 *ver) -{ - struct ceph_osd_request *req; - struct ceph_file_layout *layout; - int ret; - u64 bno; - struct timespec mtime = CURRENT_TIME; - struct rbd_request *req_data; - struct ceph_osd_request_head *reqhead; - struct ceph_osd_client *osdc; +static void rbd_osd_stat_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p\n", __func__, obj_request); + obj_request_done_set(obj_request); +} - req_data = kzalloc(sizeof(*req_data), GFP_NOIO); - if (!req_data) { - if (coll) - rbd_coll_end_req_index(rq, coll, coll_index, - -ENOMEM, len); - return -ENOMEM; +static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, + struct ceph_msg *msg) +{ + struct rbd_obj_request *obj_request = osd_req->r_priv; + u16 opcode; + + dout("%s: osd_req %p msg %p\n", __func__, osd_req, msg); + rbd_assert(osd_req == obj_request->osd_req); + rbd_assert(!!obj_request->img_request ^ + (obj_request->which == BAD_WHICH)); + + if (osd_req->r_result < 0) + obj_request->result = osd_req->r_result; + obj_request->version = le64_to_cpu(osd_req->r_reassert_version.version); + + WARN_ON(osd_req->r_num_ops != 1); /* For now */ + + /* + * We support a 64-bit length, but ultimately it has to be + * passed to blk_end_request(), which takes an unsigned int. + */ + obj_request->xferred = osd_req->r_reply_op_len[0]; + rbd_assert(obj_request->xferred < (u64) UINT_MAX); + opcode = osd_req->r_request_ops[0].op; + switch (opcode) { + case CEPH_OSD_OP_READ: + rbd_osd_read_callback(obj_request); + break; + case CEPH_OSD_OP_WRITE: + rbd_osd_write_callback(obj_request); + break; + case CEPH_OSD_OP_STAT: + rbd_osd_stat_callback(obj_request); + break; + case CEPH_OSD_OP_CALL: + case CEPH_OSD_OP_NOTIFY_ACK: + case CEPH_OSD_OP_WATCH: + rbd_osd_trivial_callback(obj_request); + break; + default: + rbd_warn(NULL, "%s: unsupported op %hu\n", + obj_request->object_name, (unsigned short) opcode); + break; } - if (coll) { - req_data->coll = coll; - req_data->coll_index = coll_index; + if (obj_request_done_test(obj_request)) + rbd_obj_request_complete(obj_request); +} + +static struct ceph_osd_request *rbd_osd_req_create( + struct rbd_device *rbd_dev, + bool write_request, + struct rbd_obj_request *obj_request, + struct ceph_osd_req_op *op) +{ + struct rbd_img_request *img_request = obj_request->img_request; + struct ceph_snap_context *snapc = NULL; + struct ceph_osd_client *osdc; + struct ceph_osd_request *osd_req; + struct timespec now; + struct timespec *mtime; + u64 snap_id = CEPH_NOSNAP; + u64 offset = obj_request->offset; + u64 length = obj_request->length; + + if (img_request) { + rbd_assert(img_request->write_request == write_request); + if (img_request->write_request) + snapc = img_request->snapc; + else + snap_id = img_request->snap_id; } - dout("rbd_do_request object_name=%s ofs=%llu len=%llu coll=%p[%d]\n", - object_name, (unsigned long long) ofs, - (unsigned long long) len, coll, coll_index); + /* Allocate and initialize the request, for the single op */ osdc = &rbd_dev->rbd_client->client->osdc; - req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, - false, GFP_NOIO, pages, bio); - if (!req) { - ret = -ENOMEM; - goto done_pages; + osd_req = ceph_osdc_alloc_request(osdc, snapc, 1, false, GFP_ATOMIC); + if (!osd_req) + return NULL; /* ENOMEM */ + + rbd_assert(obj_request_type_valid(obj_request->type)); + switch (obj_request->type) { + case OBJ_REQUEST_NODATA: + break; /* Nothing to do */ + case OBJ_REQUEST_BIO: + rbd_assert(obj_request->bio_list != NULL); + osd_req->r_bio = obj_request->bio_list; + break; + case OBJ_REQUEST_PAGES: + osd_req->r_pages = obj_request->pages; + osd_req->r_num_pages = obj_request->page_count; + osd_req->r_page_alignment = offset & ~PAGE_MASK; + break; } - req->r_callback = rbd_cb; + if (write_request) { + osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; + now = CURRENT_TIME; + mtime = &now; + } else { + osd_req->r_flags = CEPH_OSD_FLAG_READ; + mtime = NULL; /* not needed for reads */ + offset = 0; /* These are not used... */ + length = 0; /* ...for osd read requests */ + } - req_data->rq = rq; - req_data->bio = bio; - req_data->pages = pages; - req_data->len = len; + osd_req->r_callback = rbd_osd_req_callback; + osd_req->r_priv = obj_request; - req->r_priv = req_data; + osd_req->r_oid_len = strlen(obj_request->object_name); + rbd_assert(osd_req->r_oid_len < sizeof (osd_req->r_oid)); + memcpy(osd_req->r_oid, obj_request->object_name, osd_req->r_oid_len); - reqhead = req->r_request->front.iov_base; - reqhead->snapid = cpu_to_le64(CEPH_NOSNAP); + osd_req->r_file_layout = rbd_dev->layout; /* struct */ - strncpy(req->r_oid, object_name, sizeof(req->r_oid)); - req->r_oid_len = strlen(req->r_oid); + /* osd_req will get its own reference to snapc (if non-null) */ - layout = &req->r_file_layout; - memset(layout, 0, sizeof(*layout)); - layout->fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_stripe_count = cpu_to_le32(1); - layout->fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); - layout->fl_pg_pool = cpu_to_le32((int) rbd_dev->spec->pool_id); - ret = ceph_calc_raw_layout(osdc, layout, snapid, ofs, &len, &bno, - req, ops); - rbd_assert(ret == 0); + ceph_osdc_build_request(osd_req, offset, length, 1, op, + snapc, snap_id, mtime); - ceph_osdc_build_request(req, ofs, &len, - ops, - snapc, - &mtime, - req->r_oid, req->r_oid_len); + return osd_req; +} - if (linger_req) { - ceph_osdc_set_request_linger(osdc, req); - *linger_req = req; - } +static void rbd_osd_req_destroy(struct ceph_osd_request *osd_req) +{ + ceph_osdc_put_request(osd_req); +} - ret = ceph_osdc_start_request(osdc, req, false); - if (ret < 0) - goto done_err; - - if (!rbd_cb) { - ret = ceph_osdc_wait_request(osdc, req); - if (ver) - *ver = le64_to_cpu(req->r_reassert_version.version); - dout("reassert_ver=%llu\n", - (unsigned long long) - le64_to_cpu(req->r_reassert_version.version)); - ceph_osdc_put_request(req); +/* object_name is assumed to be a non-null pointer and NUL-terminated */ + +static struct rbd_obj_request *rbd_obj_request_create(const char *object_name, + u64 offset, u64 length, + enum obj_request_type type) +{ + struct rbd_obj_request *obj_request; + size_t size; + char *name; + + rbd_assert(obj_request_type_valid(type)); + + size = strlen(object_name) + 1; + obj_request = kzalloc(sizeof (*obj_request) + size, GFP_KERNEL); + if (!obj_request) + return NULL; + + name = (char *)(obj_request + 1); + obj_request->object_name = memcpy(name, object_name, size); + obj_request->offset = offset; + obj_request->length = length; + obj_request->which = BAD_WHICH; + obj_request->type = type; + INIT_LIST_HEAD(&obj_request->links); + obj_request_done_init(obj_request); + init_completion(&obj_request->completion); + kref_init(&obj_request->kref); + + dout("%s: \"%s\" %llu/%llu %d -> obj %p\n", __func__, object_name, + offset, length, (int)type, obj_request); + + return obj_request; +} + +static void rbd_obj_request_destroy(struct kref *kref) +{ + struct rbd_obj_request *obj_request; + + obj_request = container_of(kref, struct rbd_obj_request, kref); + + dout("%s: obj %p\n", __func__, obj_request); + + rbd_assert(obj_request->img_request == NULL); + rbd_assert(obj_request->which == BAD_WHICH); + + if (obj_request->osd_req) + rbd_osd_req_destroy(obj_request->osd_req); + + rbd_assert(obj_request_type_valid(obj_request->type)); + switch (obj_request->type) { + case OBJ_REQUEST_NODATA: + break; /* Nothing to do */ + case OBJ_REQUEST_BIO: + if (obj_request->bio_list) + bio_chain_put(obj_request->bio_list); + break; + case OBJ_REQUEST_PAGES: + if (obj_request->pages) + ceph_release_page_vector(obj_request->pages, + obj_request->page_count); + break; } - return ret; -done_err: - bio_chain_put(req_data->bio); - ceph_osdc_put_request(req); -done_pages: - rbd_coll_end_req(req_data, ret, len); - kfree(req_data); - return ret; + kfree(obj_request); } /* - * Ceph osd op callback + * Caller is responsible for filling in the list of object requests + * that comprises the image request, and the Linux request pointer + * (if there is one). */ -static void rbd_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) -{ - struct rbd_request *req_data = req->r_priv; - struct ceph_osd_reply_head *replyhead; - struct ceph_osd_op *op; - __s32 rc; - u64 bytes; - int read_op; - - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - op = (void *)(replyhead + 1); - rc = le32_to_cpu(replyhead->result); - bytes = le64_to_cpu(op->extent.length); - read_op = (le16_to_cpu(op->op) == CEPH_OSD_OP_READ); - - dout("rbd_req_cb bytes=%llu readop=%d rc=%d\n", - (unsigned long long) bytes, read_op, (int) rc); - - if (rc == -ENOENT && read_op) { - zero_bio_chain(req_data->bio, 0); - rc = 0; - } else if (rc == 0 && read_op && bytes < req_data->len) { - zero_bio_chain(req_data->bio, bytes); - bytes = req_data->len; - } +static struct rbd_img_request *rbd_img_request_create( + struct rbd_device *rbd_dev, + u64 offset, u64 length, + bool write_request) +{ + struct rbd_img_request *img_request; + struct ceph_snap_context *snapc = NULL; - rbd_coll_end_req(req_data, rc, bytes); + img_request = kmalloc(sizeof (*img_request), GFP_ATOMIC); + if (!img_request) + return NULL; - if (req_data->bio) - bio_chain_put(req_data->bio); + if (write_request) { + down_read(&rbd_dev->header_rwsem); + snapc = ceph_get_snap_context(rbd_dev->header.snapc); + up_read(&rbd_dev->header_rwsem); + if (WARN_ON(!snapc)) { + kfree(img_request); + return NULL; /* Shouldn't happen */ + } + } - ceph_osdc_put_request(req); - kfree(req_data); + img_request->rq = NULL; + img_request->rbd_dev = rbd_dev; + img_request->offset = offset; + img_request->length = length; + img_request->write_request = write_request; + if (write_request) + img_request->snapc = snapc; + else + img_request->snap_id = rbd_dev->spec->snap_id; + spin_lock_init(&img_request->completion_lock); + img_request->next_completion = 0; + img_request->callback = NULL; + img_request->obj_request_count = 0; + INIT_LIST_HEAD(&img_request->obj_requests); + kref_init(&img_request->kref); + + rbd_img_request_get(img_request); /* Avoid a warning */ + rbd_img_request_put(img_request); /* TEMPORARY */ + + dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, + write_request ? "write" : "read", offset, length, + img_request); + + return img_request; } -static void rbd_simple_req_cb(struct ceph_osd_request *req, struct ceph_msg *msg) +static void rbd_img_request_destroy(struct kref *kref) { - ceph_osdc_put_request(req); + struct rbd_img_request *img_request; + struct rbd_obj_request *obj_request; + struct rbd_obj_request *next_obj_request; + + img_request = container_of(kref, struct rbd_img_request, kref); + + dout("%s: img %p\n", __func__, img_request); + + for_each_obj_request_safe(img_request, obj_request, next_obj_request) + rbd_img_obj_request_del(img_request, obj_request); + rbd_assert(img_request->obj_request_count == 0); + + if (img_request->write_request) + ceph_put_snap_context(img_request->snapc); + + kfree(img_request); } -/* - * Do a synchronous ceph osd operation - */ -static int rbd_req_sync_op(struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 snapid, - int flags, - struct ceph_osd_req_op *ops, - const char *object_name, - u64 ofs, u64 inbound_size, - char *inbound, - struct ceph_osd_request **linger_req, - u64 *ver) +static int rbd_img_request_fill_bio(struct rbd_img_request *img_request, + struct bio *bio_list) { - int ret; - struct page **pages; - int num_pages; - - rbd_assert(ops != NULL); + struct rbd_device *rbd_dev = img_request->rbd_dev; + struct rbd_obj_request *obj_request = NULL; + struct rbd_obj_request *next_obj_request; + unsigned int bio_offset; + u64 image_offset; + u64 resid; + u16 opcode; + + dout("%s: img %p bio %p\n", __func__, img_request, bio_list); + + opcode = img_request->write_request ? CEPH_OSD_OP_WRITE + : CEPH_OSD_OP_READ; + bio_offset = 0; + image_offset = img_request->offset; + rbd_assert(image_offset == bio_list->bi_sector << SECTOR_SHIFT); + resid = img_request->length; + rbd_assert(resid > 0); + while (resid) { + const char *object_name; + unsigned int clone_size; + struct ceph_osd_req_op *op; + u64 offset; + u64 length; + + object_name = rbd_segment_name(rbd_dev, image_offset); + if (!object_name) + goto out_unwind; + offset = rbd_segment_offset(rbd_dev, image_offset); + length = rbd_segment_length(rbd_dev, image_offset, resid); + obj_request = rbd_obj_request_create(object_name, + offset, length, + OBJ_REQUEST_BIO); + kfree(object_name); /* object request has its own copy */ + if (!obj_request) + goto out_unwind; + + rbd_assert(length <= (u64) UINT_MAX); + clone_size = (unsigned int) length; + obj_request->bio_list = bio_chain_clone_range(&bio_list, + &bio_offset, clone_size, + GFP_ATOMIC); + if (!obj_request->bio_list) + goto out_partial; - num_pages = calc_pages_for(ofs, inbound_size); - pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL); - if (IS_ERR(pages)) - return PTR_ERR(pages); + /* + * Build up the op to use in building the osd + * request. Note that the contents of the op are + * copied by rbd_osd_req_create(). + */ + op = rbd_osd_req_op_create(opcode, offset, length); + if (!op) + goto out_partial; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, + img_request->write_request, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out_partial; + /* status and version are initially zero-filled */ + + rbd_img_obj_request_add(img_request, obj_request); + + image_offset += length; + resid -= length; + } - ret = rbd_do_request(NULL, rbd_dev, snapc, snapid, - object_name, ofs, inbound_size, NULL, - pages, num_pages, - flags, - ops, - NULL, 0, - NULL, - linger_req, ver); - if (ret < 0) - goto done; + return 0; - if ((flags & CEPH_OSD_FLAG_READ) && inbound) - ret = ceph_copy_from_page_vector(pages, inbound, ofs, ret); +out_partial: + rbd_obj_request_put(obj_request); +out_unwind: + for_each_obj_request_safe(img_request, obj_request, next_obj_request) + rbd_obj_request_put(obj_request); -done: - ceph_release_page_vector(pages, num_pages); - return ret; + return -ENOMEM; } -/* - * Do an asynchronous ceph osd operation - */ -static int rbd_do_op(struct request *rq, - struct rbd_device *rbd_dev, - struct ceph_snap_context *snapc, - u64 ofs, u64 len, - struct bio *bio, - struct rbd_req_coll *coll, - int coll_index) -{ - char *seg_name; - u64 seg_ofs; - u64 seg_len; - int ret; - struct ceph_osd_req_op *ops; - u32 payload_len; - int opcode; - int flags; - u64 snapid; - - seg_name = rbd_segment_name(rbd_dev, ofs); - if (!seg_name) - return -ENOMEM; - seg_len = rbd_segment_length(rbd_dev, ofs, len); - seg_ofs = rbd_segment_offset(rbd_dev, ofs); - - if (rq_data_dir(rq) == WRITE) { - opcode = CEPH_OSD_OP_WRITE; - flags = CEPH_OSD_FLAG_WRITE|CEPH_OSD_FLAG_ONDISK; - snapid = CEPH_NOSNAP; - payload_len = seg_len; - } else { - opcode = CEPH_OSD_OP_READ; - flags = CEPH_OSD_FLAG_READ; - snapc = NULL; - snapid = rbd_dev->spec->snap_id; - payload_len = 0; +static void rbd_img_obj_callback(struct rbd_obj_request *obj_request) +{ + struct rbd_img_request *img_request; + u32 which = obj_request->which; + bool more = true; + + img_request = obj_request->img_request; + + dout("%s: img %p obj %p\n", __func__, img_request, obj_request); + rbd_assert(img_request != NULL); + rbd_assert(img_request->rq != NULL); + rbd_assert(img_request->obj_request_count > 0); + rbd_assert(which != BAD_WHICH); + rbd_assert(which < img_request->obj_request_count); + rbd_assert(which >= img_request->next_completion); + + spin_lock_irq(&img_request->completion_lock); + if (which != img_request->next_completion) + goto out; + + for_each_obj_request_from(img_request, obj_request) { + unsigned int xferred; + int result; + + rbd_assert(more); + rbd_assert(which < img_request->obj_request_count); + + if (!obj_request_done_test(obj_request)) + break; + + rbd_assert(obj_request->xferred <= (u64) UINT_MAX); + xferred = (unsigned int) obj_request->xferred; + result = (int) obj_request->result; + if (result) + rbd_warn(NULL, "obj_request %s result %d xferred %u\n", + img_request->write_request ? "write" : "read", + result, xferred); + + more = blk_end_request(img_request->rq, result, xferred); + which++; } - ret = -ENOMEM; - ops = rbd_create_rw_ops(1, opcode, payload_len); - if (!ops) - goto done; + rbd_assert(more ^ (which == img_request->obj_request_count)); + img_request->next_completion = which; +out: + spin_unlock_irq(&img_request->completion_lock); - /* we've taken care of segment sizes earlier when we - cloned the bios. We should never have a segment - truncated at this point */ - rbd_assert(seg_len == len); - - ret = rbd_do_request(rq, rbd_dev, snapc, snapid, - seg_name, seg_ofs, seg_len, - bio, - NULL, 0, - flags, - ops, - coll, coll_index, - rbd_req_cb, 0, NULL); - - rbd_destroy_ops(ops); -done: - kfree(seg_name); - return ret; + if (!more) + rbd_img_request_complete(img_request); } -/* - * Request sync osd read - */ -static int rbd_req_sync_read(struct rbd_device *rbd_dev, - u64 snapid, - const char *object_name, - u64 ofs, u64 len, - char *buf, - u64 *ver) -{ - struct ceph_osd_req_op *ops; - int ret; +static int rbd_img_request_submit(struct rbd_img_request *img_request) +{ + struct rbd_device *rbd_dev = img_request->rbd_dev; + struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_READ, 0); - if (!ops) - return -ENOMEM; + dout("%s: img %p\n", __func__, img_request); + for_each_obj_request(img_request, obj_request) { + int ret; - ret = rbd_req_sync_op(rbd_dev, NULL, - snapid, - CEPH_OSD_FLAG_READ, - ops, object_name, ofs, len, buf, NULL, ver); - rbd_destroy_ops(ops); + obj_request->callback = rbd_img_obj_callback; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + return ret; + /* + * The image request has its own reference to each + * of its object requests, so we can safely drop the + * initial one here. + */ + rbd_obj_request_put(obj_request); + } - return ret; + return 0; } -/* - * Request sync osd watch - */ -static int rbd_req_sync_notify_ack(struct rbd_device *rbd_dev, - u64 ver, - u64 notify_id) +static int rbd_obj_notify_ack(struct rbd_device *rbd_dev, + u64 ver, u64 notify_id) { - struct ceph_osd_req_op *ops; + struct rbd_obj_request *obj_request; + struct ceph_osd_req_op *op; + struct ceph_osd_client *osdc; int ret; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_NOTIFY_ACK, 0); - if (!ops) + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) return -ENOMEM; - ops[0].watch.ver = cpu_to_le64(ver); - ops[0].watch.cookie = notify_id; - ops[0].watch.flag = 0; + ret = -ENOMEM; + op = rbd_osd_req_op_create(CEPH_OSD_OP_NOTIFY_ACK, notify_id, ver); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; - ret = rbd_do_request(NULL, rbd_dev, NULL, CEPH_NOSNAP, - rbd_dev->header_name, 0, 0, NULL, - NULL, 0, - CEPH_OSD_FLAG_READ, - ops, - NULL, 0, - rbd_simple_req_cb, 0, NULL); + osdc = &rbd_dev->rbd_client->client->osdc; + obj_request->callback = rbd_obj_request_put; + ret = rbd_obj_request_submit(osdc, obj_request); +out: + if (ret) + rbd_obj_request_put(obj_request); - rbd_destroy_ops(ops); return ret; } @@ -1381,95 +1791,103 @@ static void rbd_watch_cb(u64 ver, u64 notify_id, u8 opcode, void *data) if (!rbd_dev) return; - dout("rbd_watch_cb %s notify_id=%llu opcode=%u\n", + dout("%s: \"%s\" notify_id %llu opcode %u\n", __func__, rbd_dev->header_name, (unsigned long long) notify_id, (unsigned int) opcode); rc = rbd_dev_refresh(rbd_dev, &hver); if (rc) - pr_warning(RBD_DRV_NAME "%d got notification but failed to " - " update snaps: %d\n", rbd_dev->major, rc); + rbd_warn(rbd_dev, "got notification but failed to " + " update snaps: %d\n", rc); - rbd_req_sync_notify_ack(rbd_dev, hver, notify_id); + rbd_obj_notify_ack(rbd_dev, hver, notify_id); } /* - * Request sync osd watch + * Request sync osd watch/unwatch. The value of "start" determines + * whether a watch request is being initiated or torn down. */ -static int rbd_req_sync_watch(struct rbd_device *rbd_dev) +static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) { - struct ceph_osd_req_op *ops; struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc; + struct rbd_obj_request *obj_request; + struct ceph_osd_req_op *op; int ret; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); - if (!ops) - return -ENOMEM; + rbd_assert(start ^ !!rbd_dev->watch_event); + rbd_assert(start ^ !!rbd_dev->watch_request); - ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, - (void *)rbd_dev, &rbd_dev->watch_event); - if (ret < 0) - goto fail; + if (start) { + ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, + &rbd_dev->watch_event); + if (ret < 0) + return ret; + rbd_assert(rbd_dev->watch_event != NULL); + } - ops[0].watch.ver = cpu_to_le64(rbd_dev->header.obj_version); - ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); - ops[0].watch.flag = 1; + ret = -ENOMEM; + obj_request = rbd_obj_request_create(rbd_dev->header_name, 0, 0, + OBJ_REQUEST_NODATA); + if (!obj_request) + goto out_cancel; + + op = rbd_osd_req_op_create(CEPH_OSD_OP_WATCH, + rbd_dev->watch_event->cookie, + rbd_dev->header.obj_version, start); + if (!op) + goto out_cancel; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out_cancel; + + if (start) + ceph_osdc_set_request_linger(osdc, obj_request->osd_req); + else + ceph_osdc_unregister_linger_request(osdc, + rbd_dev->watch_request->osd_req); + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out_cancel; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out_cancel; + ret = obj_request->result; + if (ret) + goto out_cancel; - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - rbd_dev->header_name, - 0, 0, NULL, - &rbd_dev->watch_request, NULL); + /* + * A watch request is set to linger, so the underlying osd + * request won't go away until we unregister it. We retain + * a pointer to the object request during that time (in + * rbd_dev->watch_request), so we'll keep a reference to + * it. We'll drop that reference (below) after we've + * unregistered it. + */ + if (start) { + rbd_dev->watch_request = obj_request; - if (ret < 0) - goto fail_event; + return 0; + } - rbd_destroy_ops(ops); - return 0; + /* We have successfully torn down the watch request */ -fail_event: + rbd_obj_request_put(rbd_dev->watch_request); + rbd_dev->watch_request = NULL; +out_cancel: + /* Cancel the event if we're tearing down, or on error */ ceph_osdc_cancel_event(rbd_dev->watch_event); rbd_dev->watch_event = NULL; -fail: - rbd_destroy_ops(ops); - return ret; -} + if (obj_request) + rbd_obj_request_put(obj_request); -/* - * Request sync osd unwatch - */ -static int rbd_req_sync_unwatch(struct rbd_device *rbd_dev) -{ - struct ceph_osd_req_op *ops; - int ret; - - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_WATCH, 0); - if (!ops) - return -ENOMEM; - - ops[0].watch.ver = 0; - ops[0].watch.cookie = cpu_to_le64(rbd_dev->watch_event->cookie); - ops[0].watch.flag = 0; - - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK, - ops, - rbd_dev->header_name, - 0, 0, NULL, NULL, NULL); - - - rbd_destroy_ops(ops); - ceph_osdc_cancel_event(rbd_dev->watch_event); - rbd_dev->watch_event = NULL; return ret; } /* * Synchronous osd object method call */ -static int rbd_req_sync_exec(struct rbd_device *rbd_dev, +static int rbd_obj_method_sync(struct rbd_device *rbd_dev, const char *object_name, const char *class_name, const char *method_name, @@ -1477,169 +1895,154 @@ static int rbd_req_sync_exec(struct rbd_device *rbd_dev, size_t outbound_size, char *inbound, size_t inbound_size, - int flags, - u64 *ver) + u64 *version) { - struct ceph_osd_req_op *ops; - int class_name_len = strlen(class_name); - int method_name_len = strlen(method_name); - int payload_size; + struct rbd_obj_request *obj_request; + struct ceph_osd_client *osdc; + struct ceph_osd_req_op *op; + struct page **pages; + u32 page_count; int ret; /* - * Any input parameters required by the method we're calling - * will be sent along with the class and method names as - * part of the message payload. That data and its size are - * supplied via the indata and indata_len fields (named from - * the perspective of the server side) in the OSD request - * operation. + * Method calls are ultimately read operations but they + * don't involve object data (so no offset or length). + * The result should placed into the inbound buffer + * provided. They also supply outbound data--parameters for + * the object method. Currently if this is present it will + * be a snapshot id. */ - payload_size = class_name_len + method_name_len + outbound_size; - ops = rbd_create_rw_ops(1, CEPH_OSD_OP_CALL, payload_size); - if (!ops) - return -ENOMEM; + page_count = (u32) calc_pages_for(0, inbound_size); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) + return PTR_ERR(pages); - ops[0].cls.class_name = class_name; - ops[0].cls.class_len = (__u8) class_name_len; - ops[0].cls.method_name = method_name; - ops[0].cls.method_len = (__u8) method_name_len; - ops[0].cls.argc = 0; - ops[0].cls.indata = outbound; - ops[0].cls.indata_len = outbound_size; + ret = -ENOMEM; + obj_request = rbd_obj_request_create(object_name, 0, 0, + OBJ_REQUEST_PAGES); + if (!obj_request) + goto out; - ret = rbd_req_sync_op(rbd_dev, NULL, - CEPH_NOSNAP, - flags, ops, - object_name, 0, inbound_size, inbound, - NULL, ver); + obj_request->pages = pages; + obj_request->page_count = page_count; - rbd_destroy_ops(ops); + op = rbd_osd_req_op_create(CEPH_OSD_OP_CALL, class_name, + method_name, outbound, outbound_size); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; - dout("cls_exec returned %d\n", ret); - return ret; -} + osdc = &rbd_dev->rbd_client->client->osdc; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out; -static struct rbd_req_coll *rbd_alloc_coll(int num_reqs) -{ - struct rbd_req_coll *coll = - kzalloc(sizeof(struct rbd_req_coll) + - sizeof(struct rbd_req_status) * num_reqs, - GFP_ATOMIC); + ret = obj_request->result; + if (ret < 0) + goto out; + ret = 0; + ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); + if (version) + *version = obj_request->version; +out: + if (obj_request) + rbd_obj_request_put(obj_request); + else + ceph_release_page_vector(pages, page_count); - if (!coll) - return NULL; - coll->total = num_reqs; - kref_init(&coll->kref); - return coll; + return ret; } -/* - * block device queue callback - */ -static void rbd_rq_fn(struct request_queue *q) +static void rbd_request_fn(struct request_queue *q) + __releases(q->queue_lock) __acquires(q->queue_lock) { struct rbd_device *rbd_dev = q->queuedata; + bool read_only = rbd_dev->mapping.read_only; struct request *rq; + int result; while ((rq = blk_fetch_request(q))) { - struct bio *bio; - bool do_write; - unsigned int size; - u64 ofs; - int num_segs, cur_seg = 0; - struct rbd_req_coll *coll; - struct ceph_snap_context *snapc; - unsigned int bio_offset; - - dout("fetched request\n"); - - /* filter out block requests we don't understand */ - if ((rq->cmd_type != REQ_TYPE_FS)) { - __blk_end_request_all(rq, 0); - continue; - } + bool write_request = rq_data_dir(rq) == WRITE; + struct rbd_img_request *img_request; + u64 offset; + u64 length; + + /* Ignore any non-FS requests that filter through. */ - /* deduce our operation (read, write) */ - do_write = (rq_data_dir(rq) == WRITE); - if (do_write && rbd_dev->mapping.read_only) { - __blk_end_request_all(rq, -EROFS); + if (rq->cmd_type != REQ_TYPE_FS) { + dout("%s: non-fs request type %d\n", __func__, + (int) rq->cmd_type); + __blk_end_request_all(rq, 0); continue; } - spin_unlock_irq(q->queue_lock); + /* Ignore/skip any zero-length requests */ - down_read(&rbd_dev->header_rwsem); + offset = (u64) blk_rq_pos(rq) << SECTOR_SHIFT; + length = (u64) blk_rq_bytes(rq); - if (!rbd_dev->exists) { - rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); - up_read(&rbd_dev->header_rwsem); - dout("request for non-existent snapshot"); - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENXIO); + if (!length) { + dout("%s: zero-length request\n", __func__); + __blk_end_request_all(rq, 0); continue; } - snapc = ceph_get_snap_context(rbd_dev->header.snapc); - - up_read(&rbd_dev->header_rwsem); - - size = blk_rq_bytes(rq); - ofs = blk_rq_pos(rq) * SECTOR_SIZE; - bio = rq->bio; + spin_unlock_irq(q->queue_lock); - dout("%s 0x%x bytes at 0x%llx\n", - do_write ? "write" : "read", - size, (unsigned long long) blk_rq_pos(rq) * SECTOR_SIZE); + /* Disallow writes to a read-only device */ - num_segs = rbd_get_num_segments(&rbd_dev->header, ofs, size); - if (num_segs <= 0) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, num_segs); - ceph_put_snap_context(snapc); - continue; + if (write_request) { + result = -EROFS; + if (read_only) + goto end_request; + rbd_assert(rbd_dev->spec->snap_id == CEPH_NOSNAP); } - coll = rbd_alloc_coll(num_segs); - if (!coll) { - spin_lock_irq(q->queue_lock); - __blk_end_request_all(rq, -ENOMEM); - ceph_put_snap_context(snapc); - continue; - } - - bio_offset = 0; - do { - u64 limit = rbd_segment_length(rbd_dev, ofs, size); - unsigned int chain_size; - struct bio *bio_chain; - - BUG_ON(limit > (u64) UINT_MAX); - chain_size = (unsigned int) limit; - dout("rq->bio->bi_vcnt=%hu\n", rq->bio->bi_vcnt); - kref_get(&coll->kref); + /* + * Quit early if the mapped snapshot no longer + * exists. It's still possible the snapshot will + * have disappeared by the time our request arrives + * at the osd, but there's no sense in sending it if + * we already know. + */ + if (!test_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags)) { + dout("request for non-existent snapshot"); + rbd_assert(rbd_dev->spec->snap_id != CEPH_NOSNAP); + result = -ENXIO; + goto end_request; + } - /* Pass a cloned bio chain via an osd request */ + result = -EINVAL; + if (WARN_ON(offset && length > U64_MAX - offset + 1)) + goto end_request; /* Shouldn't happen */ - bio_chain = bio_chain_clone_range(&bio, - &bio_offset, chain_size, - GFP_ATOMIC); - if (bio_chain) - (void) rbd_do_op(rq, rbd_dev, snapc, - ofs, chain_size, - bio_chain, coll, cur_seg); - else - rbd_coll_end_req_index(rq, coll, cur_seg, - -ENOMEM, chain_size); - size -= chain_size; - ofs += chain_size; + result = -ENOMEM; + img_request = rbd_img_request_create(rbd_dev, offset, length, + write_request); + if (!img_request) + goto end_request; - cur_seg++; - } while (size > 0); - kref_put(&coll->kref, rbd_coll_release); + img_request->rq = rq; + result = rbd_img_request_fill_bio(img_request, rq->bio); + if (!result) + result = rbd_img_request_submit(img_request); + if (result) + rbd_img_request_put(img_request); +end_request: spin_lock_irq(q->queue_lock); - - ceph_put_snap_context(snapc); + if (result < 0) { + rbd_warn(rbd_dev, "obj_request %s result %d\n", + write_request ? "write" : "read", result); + __blk_end_request_all(rq, result); + } } } @@ -1703,6 +2106,71 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) put_disk(disk); } +static int rbd_obj_read_sync(struct rbd_device *rbd_dev, + const char *object_name, + u64 offset, u64 length, + char *buf, u64 *version) + +{ + struct ceph_osd_req_op *op; + struct rbd_obj_request *obj_request; + struct ceph_osd_client *osdc; + struct page **pages = NULL; + u32 page_count; + size_t size; + int ret; + + page_count = (u32) calc_pages_for(offset, length); + pages = ceph_alloc_page_vector(page_count, GFP_KERNEL); + if (IS_ERR(pages)) + ret = PTR_ERR(pages); + + ret = -ENOMEM; + obj_request = rbd_obj_request_create(object_name, offset, length, + OBJ_REQUEST_PAGES); + if (!obj_request) + goto out; + + obj_request->pages = pages; + obj_request->page_count = page_count; + + op = rbd_osd_req_op_create(CEPH_OSD_OP_READ, offset, length); + if (!op) + goto out; + obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, + obj_request, op); + rbd_osd_req_op_destroy(op); + if (!obj_request->osd_req) + goto out; + + osdc = &rbd_dev->rbd_client->client->osdc; + ret = rbd_obj_request_submit(osdc, obj_request); + if (ret) + goto out; + ret = rbd_obj_request_wait(obj_request); + if (ret) + goto out; + + ret = obj_request->result; + if (ret < 0) + goto out; + + rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); + size = (size_t) obj_request->xferred; + ceph_copy_from_page_vector(pages, buf, 0, size); + rbd_assert(size <= (size_t) INT_MAX); + ret = (int) size; + if (version) + *version = obj_request->version; +out: + if (obj_request) + rbd_obj_request_put(obj_request); + else + ceph_release_page_vector(pages, page_count); + + return ret; +} + /* * Read the complete header for the given rbd device. * @@ -1741,24 +2209,20 @@ rbd_dev_v1_header_read(struct rbd_device *rbd_dev, u64 *version) if (!ondisk) return ERR_PTR(-ENOMEM); - ret = rbd_req_sync_read(rbd_dev, CEPH_NOSNAP, - rbd_dev->header_name, + ret = rbd_obj_read_sync(rbd_dev, rbd_dev->header_name, 0, size, (char *) ondisk, version); - if (ret < 0) goto out_err; if (WARN_ON((size_t) ret < size)) { ret = -ENXIO; - pr_warning("short header read for image %s" - " (want %zd got %d)\n", - rbd_dev->spec->image_name, size, ret); + rbd_warn(rbd_dev, "short header read (want %zd got %d)", + size, ret); goto out_err; } if (!rbd_dev_ondisk_valid(ondisk)) { ret = -ENXIO; - pr_warning("invalid header for image %s\n", - rbd_dev->spec->image_name); + rbd_warn(rbd_dev, "invalid header"); goto out_err; } @@ -1895,8 +2359,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->fops = &rbd_bd_ops; disk->private_data = rbd_dev; - /* init rq */ - q = blk_init_queue(rbd_rq_fn, &rbd_dev->lock); + q = blk_init_queue(rbd_request_fn, &rbd_dev->lock); if (!q) goto out_disk; @@ -2233,7 +2696,7 @@ static void rbd_spec_free(struct kref *kref) kfree(spec); } -struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, +static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, struct rbd_spec *spec) { struct rbd_device *rbd_dev; @@ -2243,6 +2706,7 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, return NULL; spin_lock_init(&rbd_dev->lock); + rbd_dev->flags = 0; INIT_LIST_HEAD(&rbd_dev->node); INIT_LIST_HEAD(&rbd_dev->snaps); init_rwsem(&rbd_dev->header_rwsem); @@ -2250,6 +2714,13 @@ struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, rbd_dev->spec = spec; rbd_dev->rbd_client = rbdc; + /* Initialize the layout used for all rbd requests */ + + rbd_dev->layout.fl_stripe_unit = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + rbd_dev->layout.fl_stripe_count = cpu_to_le32(1); + rbd_dev->layout.fl_object_size = cpu_to_le32(1 << RBD_MAX_OBJ_ORDER); + rbd_dev->layout.fl_pg_pool = cpu_to_le32((u32) spec->pool_id); + return rbd_dev; } @@ -2360,12 +2831,11 @@ static int _rbd_dev_v2_snap_size(struct rbd_device *rbd_dev, u64 snap_id, __le64 size; } __attribute__ ((packed)) size_buf = { 0 }; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_size", (char *) &snapid, sizeof (snapid), - (char *) &size_buf, sizeof (size_buf), - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + (char *) &size_buf, sizeof (size_buf), NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -2396,15 +2866,13 @@ static int rbd_dev_v2_object_prefix(struct rbd_device *rbd_dev) if (!reply_buf) return -ENOMEM; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_object_prefix", NULL, 0, - reply_buf, RBD_OBJ_PREFIX_LEN_MAX, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, RBD_OBJ_PREFIX_LEN_MAX, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; - ret = 0; /* rbd_req_sync_exec() can return positive */ p = reply_buf; rbd_dev->header.object_prefix = ceph_extract_encoded_string(&p, @@ -2435,12 +2903,12 @@ static int _rbd_dev_v2_snap_features(struct rbd_device *rbd_dev, u64 snap_id, u64 incompat; int ret; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_features", (char *) &snapid, sizeof (snapid), (char *) &features_buf, sizeof (features_buf), - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) return ret; @@ -2474,7 +2942,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) void *end; char *image_id; u64 overlap; - size_t len = 0; int ret; parent_spec = rbd_spec_alloc(); @@ -2492,12 +2959,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) } snapid = cpu_to_le64(CEPH_NOSNAP); - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_parent", (char *) &snapid, sizeof (snapid), - (char *) reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + (char *) reply_buf, size, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out_err; @@ -2508,13 +2974,18 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) if (parent_spec->pool_id == CEPH_NOPOOL) goto out; /* No parent? No problem. */ - image_id = ceph_extract_encoded_string(&p, end, &len, GFP_KERNEL); + /* The ceph file layout needs to fit pool id in 32 bits */ + + ret = -EIO; + if (WARN_ON(parent_spec->pool_id > (u64) U32_MAX)) + goto out; + + image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL); if (IS_ERR(image_id)) { ret = PTR_ERR(image_id); goto out_err; } parent_spec->image_id = image_id; - parent_spec->image_id_len = len; ceph_decode_64_safe(&p, end, parent_spec->snap_id, out_err); ceph_decode_64_safe(&p, end, overlap, out_err); @@ -2544,26 +3015,25 @@ static char *rbd_dev_image_name(struct rbd_device *rbd_dev) rbd_assert(!rbd_dev->spec->image_name); - image_id_size = sizeof (__le32) + rbd_dev->spec->image_id_len; + len = strlen(rbd_dev->spec->image_id); + image_id_size = sizeof (__le32) + len; image_id = kmalloc(image_id_size, GFP_KERNEL); if (!image_id) return NULL; p = image_id; end = (char *) image_id + image_id_size; - ceph_encode_string(&p, end, rbd_dev->spec->image_id, - (u32) rbd_dev->spec->image_id_len); + ceph_encode_string(&p, end, rbd_dev->spec->image_id, (u32) len); size = sizeof (__le32) + RBD_IMAGE_NAME_LEN_MAX; reply_buf = kmalloc(size, GFP_KERNEL); if (!reply_buf) goto out; - ret = rbd_req_sync_exec(rbd_dev, RBD_DIRECTORY, + ret = rbd_obj_method_sync(rbd_dev, RBD_DIRECTORY, "rbd", "dir_get_name", image_id, image_id_size, - (char *) reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); + (char *) reply_buf, size, NULL); if (ret < 0) goto out; p = reply_buf; @@ -2602,8 +3072,11 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) osdc = &rbd_dev->rbd_client->client->osdc; name = ceph_pg_pool_name_by_id(osdc->osdmap, rbd_dev->spec->pool_id); - if (!name) - return -EIO; /* pool id too large (>= 2^31) */ + if (!name) { + rbd_warn(rbd_dev, "there is no pool with id %llu", + rbd_dev->spec->pool_id); /* Really a BUG() */ + return -EIO; + } rbd_dev->spec->pool_name = kstrdup(name, GFP_KERNEL); if (!rbd_dev->spec->pool_name) @@ -2612,19 +3085,17 @@ static int rbd_dev_probe_update_spec(struct rbd_device *rbd_dev) /* Fetch the image name; tolerate failure here */ name = rbd_dev_image_name(rbd_dev); - if (name) { - rbd_dev->spec->image_name_len = strlen(name); + if (name) rbd_dev->spec->image_name = (char *) name; - } else { - pr_warning(RBD_DRV_NAME "%d " - "unable to get image name for image id %s\n", - rbd_dev->major, rbd_dev->spec->image_id); - } + else + rbd_warn(rbd_dev, "unable to get image name"); /* Look up the snapshot name. */ name = rbd_snap_name(rbd_dev, rbd_dev->spec->snap_id); if (!name) { + rbd_warn(rbd_dev, "no snapshot with id %llu", + rbd_dev->spec->snap_id); /* Really a BUG() */ ret = -EIO; goto out_err; } @@ -2665,12 +3136,11 @@ static int rbd_dev_v2_snap_context(struct rbd_device *rbd_dev, u64 *ver) if (!reply_buf) return -ENOMEM; - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_snapcontext", NULL, 0, - reply_buf, size, - CEPH_OSD_FLAG_READ, ver); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, size, ver); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -2735,12 +3205,11 @@ static char *rbd_dev_v2_snap_name(struct rbd_device *rbd_dev, u32 which) return ERR_PTR(-ENOMEM); snap_id = cpu_to_le64(rbd_dev->header.snapc->snaps[which]); - ret = rbd_req_sync_exec(rbd_dev, rbd_dev->header_name, + ret = rbd_obj_method_sync(rbd_dev, rbd_dev->header_name, "rbd", "get_snapshot_name", (char *) &snap_id, sizeof (snap_id), - reply_buf, size, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + reply_buf, size, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; @@ -2766,7 +3235,7 @@ out: static char *rbd_dev_v2_snap_info(struct rbd_device *rbd_dev, u32 which, u64 *snap_size, u64 *snap_features) { - __le64 snap_id; + u64 snap_id; u8 order; int ret; @@ -2865,10 +3334,17 @@ static int rbd_dev_snaps_update(struct rbd_device *rbd_dev) if (snap_id == CEPH_NOSNAP || (snap && snap->id > snap_id)) { struct list_head *next = links->next; - /* Existing snapshot not in the new snap context */ - + /* + * A previously-existing snapshot is not in + * the new snap context. + * + * If the now missing snapshot is the one the + * image is mapped to, clear its exists flag + * so we can avoid sending any more requests + * to it. + */ if (rbd_dev->spec->snap_id == snap->id) - rbd_dev->exists = false; + clear_bit(RBD_DEV_FLAG_EXISTS, &rbd_dev->flags); rbd_remove_snap_dev(snap); dout("%ssnap id %llu has been removed\n", rbd_dev->spec->snap_id == snap->id ? @@ -2942,7 +3418,7 @@ static int rbd_dev_snaps_register(struct rbd_device *rbd_dev) struct rbd_snap *snap; int ret = 0; - dout("%s called\n", __func__); + dout("%s:\n", __func__); if (WARN_ON(!device_is_registered(&rbd_dev->dev))) return -EIO; @@ -2983,22 +3459,6 @@ static void rbd_bus_del_dev(struct rbd_device *rbd_dev) device_unregister(&rbd_dev->dev); } -static int rbd_init_watch_dev(struct rbd_device *rbd_dev) -{ - int ret, rc; - - do { - ret = rbd_req_sync_watch(rbd_dev); - if (ret == -ERANGE) { - rc = rbd_dev_refresh(rbd_dev, NULL); - if (rc < 0) - return rc; - } - } while (ret == -ERANGE); - - return ret; -} - static atomic64_t rbd_dev_id_max = ATOMIC64_INIT(0); /* @@ -3138,11 +3598,9 @@ static inline char *dup_token(const char **buf, size_t *lenp) size_t len; len = next_token(buf); - dup = kmalloc(len + 1, GFP_KERNEL); + dup = kmemdup(*buf, len + 1, GFP_KERNEL); if (!dup) return NULL; - - memcpy(dup, *buf, len); *(dup + len) = '\0'; *buf += len; @@ -3210,8 +3668,10 @@ static int rbd_add_parse_args(const char *buf, /* The first four tokens are required */ len = next_token(&buf); - if (!len) - return -EINVAL; /* Missing monitor address(es) */ + if (!len) { + rbd_warn(NULL, "no monitor address(es) provided"); + return -EINVAL; + } mon_addrs = buf; mon_addrs_size = len + 1; buf += len; @@ -3220,8 +3680,10 @@ static int rbd_add_parse_args(const char *buf, options = dup_token(&buf, NULL); if (!options) return -ENOMEM; - if (!*options) - goto out_err; /* Missing options */ + if (!*options) { + rbd_warn(NULL, "no options provided"); + goto out_err; + } spec = rbd_spec_alloc(); if (!spec) @@ -3230,14 +3692,18 @@ static int rbd_add_parse_args(const char *buf, spec->pool_name = dup_token(&buf, NULL); if (!spec->pool_name) goto out_mem; - if (!*spec->pool_name) - goto out_err; /* Missing pool name */ + if (!*spec->pool_name) { + rbd_warn(NULL, "no pool name provided"); + goto out_err; + } - spec->image_name = dup_token(&buf, &spec->image_name_len); + spec->image_name = dup_token(&buf, NULL); if (!spec->image_name) goto out_mem; - if (!*spec->image_name) - goto out_err; /* Missing image name */ + if (!*spec->image_name) { + rbd_warn(NULL, "no image name provided"); + goto out_err; + } /* * Snapshot name is optional; default is to use "-" @@ -3251,10 +3717,9 @@ static int rbd_add_parse_args(const char *buf, ret = -ENAMETOOLONG; goto out_err; } - spec->snap_name = kmalloc(len + 1, GFP_KERNEL); + spec->snap_name = kmemdup(buf, len + 1, GFP_KERNEL); if (!spec->snap_name) goto out_mem; - memcpy(spec->snap_name, buf, len); *(spec->snap_name + len) = '\0'; /* Initialize all rbd options to the defaults */ @@ -3323,7 +3788,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) * First, see if the format 2 image id file exists, and if * so, get the image's persistent id from it. */ - size = sizeof (RBD_ID_PREFIX) + rbd_dev->spec->image_name_len; + size = sizeof (RBD_ID_PREFIX) + strlen(rbd_dev->spec->image_name); object_name = kmalloc(size, GFP_NOIO); if (!object_name) return -ENOMEM; @@ -3339,21 +3804,18 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) goto out; } - ret = rbd_req_sync_exec(rbd_dev, object_name, + ret = rbd_obj_method_sync(rbd_dev, object_name, "rbd", "get_id", NULL, 0, - response, RBD_IMAGE_ID_LEN_MAX, - CEPH_OSD_FLAG_READ, NULL); - dout("%s: rbd_req_sync_exec returned %d\n", __func__, ret); + response, RBD_IMAGE_ID_LEN_MAX, NULL); + dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret); if (ret < 0) goto out; - ret = 0; /* rbd_req_sync_exec() can return positive */ p = response; rbd_dev->spec->image_id = ceph_extract_encoded_string(&p, p + RBD_IMAGE_ID_LEN_MAX, - &rbd_dev->spec->image_id_len, - GFP_NOIO); + NULL, GFP_NOIO); if (IS_ERR(rbd_dev->spec->image_id)) { ret = PTR_ERR(rbd_dev->spec->image_id); rbd_dev->spec->image_id = NULL; @@ -3377,11 +3839,10 @@ static int rbd_dev_v1_probe(struct rbd_device *rbd_dev) rbd_dev->spec->image_id = kstrdup("", GFP_KERNEL); if (!rbd_dev->spec->image_id) return -ENOMEM; - rbd_dev->spec->image_id_len = 0; /* Record the header object name for this rbd image. */ - size = rbd_dev->spec->image_name_len + sizeof (RBD_SUFFIX); + size = strlen(rbd_dev->spec->image_name) + sizeof (RBD_SUFFIX); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) { ret = -ENOMEM; @@ -3427,7 +3888,7 @@ static int rbd_dev_v2_probe(struct rbd_device *rbd_dev) * Image id was filled in by the caller. Record the header * object name for this rbd image. */ - size = sizeof (RBD_HEADER_PREFIX) + rbd_dev->spec->image_id_len; + size = sizeof (RBD_HEADER_PREFIX) + strlen(rbd_dev->spec->image_id); rbd_dev->header_name = kmalloc(size, GFP_KERNEL); if (!rbd_dev->header_name) return -ENOMEM; @@ -3542,7 +4003,7 @@ static int rbd_dev_probe_finish(struct rbd_device *rbd_dev) if (ret) goto err_out_bus; - ret = rbd_init_watch_dev(rbd_dev); + ret = rbd_dev_header_watch_sync(rbd_dev, 1); if (ret) goto err_out_bus; @@ -3638,6 +4099,13 @@ static ssize_t rbd_add(struct bus_type *bus, goto err_out_client; spec->pool_id = (u64) rc; + /* The ceph file layout needs to fit pool id in 32 bits */ + + if (WARN_ON(spec->pool_id > (u64) U32_MAX)) { + rc = -EIO; + goto err_out_client; + } + rbd_dev = rbd_dev_create(rbdc, spec); if (!rbd_dev) goto err_out_client; @@ -3691,15 +4159,8 @@ static void rbd_dev_release(struct device *dev) { struct rbd_device *rbd_dev = dev_to_rbd_dev(dev); - if (rbd_dev->watch_request) { - struct ceph_client *client = rbd_dev->rbd_client->client; - - ceph_osdc_unregister_linger_request(&client->osdc, - rbd_dev->watch_request); - } if (rbd_dev->watch_event) - rbd_req_sync_unwatch(rbd_dev); - + rbd_dev_header_watch_sync(rbd_dev, 0); /* clean up and free blkdev */ rbd_free_disk(rbd_dev); @@ -3743,10 +4204,14 @@ static ssize_t rbd_remove(struct bus_type *bus, goto done; } - if (rbd_dev->open_count) { + spin_lock_irq(&rbd_dev->lock); + if (rbd_dev->open_count) ret = -EBUSY; + else + set_bit(RBD_DEV_FLAG_REMOVING, &rbd_dev->flags); + spin_unlock_irq(&rbd_dev->lock); + if (ret < 0) goto done; - } rbd_remove_all_snaps(rbd_dev); rbd_bus_del_dev(rbd_dev); @@ -3782,10 +4247,15 @@ static void rbd_sysfs_cleanup(void) device_unregister(&rbd_root_dev); } -int __init rbd_init(void) +static int __init rbd_init(void) { int rc; + if (!libceph_compatible(NULL)) { + rbd_warn(NULL, "libceph incompatibility (quitting)"); + + return -EINVAL; + } rc = rbd_sysfs_init(); if (rc) return rc; @@ -3793,7 +4263,7 @@ int __init rbd_init(void) return 0; } -void __exit rbd_exit(void) +static void __exit rbd_exit(void) { rbd_sysfs_cleanup(); } diff --git a/drivers/block/rsxx/Makefile b/drivers/block/rsxx/Makefile new file mode 100644 index 00000000000..f35cd0b71f7 --- /dev/null +++ b/drivers/block/rsxx/Makefile @@ -0,0 +1,2 @@ +obj-$(CONFIG_BLK_DEV_RSXX) += rsxx.o +rsxx-y := config.o core.o cregs.o dev.o dma.o diff --git a/drivers/block/rsxx/config.c b/drivers/block/rsxx/config.c new file mode 100644 index 00000000000..a295e7e9ee4 --- /dev/null +++ b/drivers/block/rsxx/config.c @@ -0,0 +1,213 @@ +/* +* Filename: config.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/types.h> +#include <linux/crc32.h> +#include <linux/swab.h> + +#include "rsxx_priv.h" +#include "rsxx_cfg.h" + +static void initialize_config(void *config) +{ + struct rsxx_card_cfg *cfg = config; + + cfg->hdr.version = RSXX_CFG_VERSION; + + cfg->data.block_size = RSXX_HW_BLK_SIZE; + cfg->data.stripe_size = RSXX_HW_BLK_SIZE; + cfg->data.vendor_id = RSXX_VENDOR_ID_TMS_IBM; + cfg->data.cache_order = (-1); + cfg->data.intr_coal.mode = RSXX_INTR_COAL_DISABLED; + cfg->data.intr_coal.count = 0; + cfg->data.intr_coal.latency = 0; +} + +static u32 config_data_crc32(struct rsxx_card_cfg *cfg) +{ + /* + * Return the compliment of the CRC to ensure compatibility + * (i.e. this is how early rsxx drivers did it.) + */ + + return ~crc32(~0, &cfg->data, sizeof(cfg->data)); +} + + +/*----------------- Config Byte Swap Functions -------------------*/ +static void config_hdr_be_to_cpu(struct card_cfg_hdr *hdr) +{ + hdr->version = be32_to_cpu((__force __be32) hdr->version); + hdr->crc = be32_to_cpu((__force __be32) hdr->crc); +} + +static void config_hdr_cpu_to_be(struct card_cfg_hdr *hdr) +{ + hdr->version = (__force u32) cpu_to_be32(hdr->version); + hdr->crc = (__force u32) cpu_to_be32(hdr->crc); +} + +static void config_data_swab(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = swab32(data[i]); +} + +static void config_data_le_to_cpu(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = le32_to_cpu((__force __le32) data[i]); +} + +static void config_data_cpu_to_le(struct rsxx_card_cfg *cfg) +{ + u32 *data = (u32 *) &cfg->data; + int i; + + for (i = 0; i < (sizeof(cfg->data) / 4); i++) + data[i] = (__force u32) cpu_to_le32(data[i]); +} + + +/*----------------- Config Operations ------------------*/ +static int rsxx_save_config(struct rsxx_cardinfo *card) +{ + struct rsxx_card_cfg cfg; + int st; + + memcpy(&cfg, &card->config, sizeof(cfg)); + + if (unlikely(cfg.hdr.version != RSXX_CFG_VERSION)) { + dev_err(CARD_TO_DEV(card), + "Cannot save config with invalid version %d\n", + cfg.hdr.version); + return -EINVAL; + } + + /* Convert data to little endian for the CRC calculation. */ + config_data_cpu_to_le(&cfg); + + cfg.hdr.crc = config_data_crc32(&cfg); + + /* + * Swap the data from little endian to big endian so it can be + * stored. + */ + config_data_swab(&cfg); + config_hdr_cpu_to_be(&cfg.hdr); + + st = rsxx_creg_write(card, CREG_ADD_CONFIG, sizeof(cfg), &cfg, 1); + if (st) + return st; + + return 0; +} + +int rsxx_load_config(struct rsxx_cardinfo *card) +{ + int st; + u32 crc; + + st = rsxx_creg_read(card, CREG_ADD_CONFIG, sizeof(card->config), + &card->config, 1); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed reading card config.\n"); + return st; + } + + config_hdr_be_to_cpu(&card->config.hdr); + + if (card->config.hdr.version == RSXX_CFG_VERSION) { + /* + * We calculate the CRC with the data in little endian, because + * early drivers did not take big endian CPUs into account. + * The data is always stored in big endian, so we need to byte + * swap it before calculating the CRC. + */ + + config_data_swab(&card->config); + + /* Check the CRC */ + crc = config_data_crc32(&card->config); + if (crc != card->config.hdr.crc) { + dev_err(CARD_TO_DEV(card), + "Config corruption detected!\n"); + dev_info(CARD_TO_DEV(card), + "CRC (sb x%08x is x%08x)\n", + card->config.hdr.crc, crc); + return -EIO; + } + + /* Convert the data to CPU byteorder */ + config_data_le_to_cpu(&card->config); + + } else if (card->config.hdr.version != 0) { + dev_err(CARD_TO_DEV(card), + "Invalid config version %d.\n", + card->config.hdr.version); + /* + * Config version changes require special handling from the + * user + */ + return -EINVAL; + } else { + dev_info(CARD_TO_DEV(card), + "Initializing card configuration.\n"); + initialize_config(card); + st = rsxx_save_config(card); + if (st) + return st; + } + + card->config_valid = 1; + + dev_dbg(CARD_TO_DEV(card), "version: x%08x\n", + card->config.hdr.version); + dev_dbg(CARD_TO_DEV(card), "crc: x%08x\n", + card->config.hdr.crc); + dev_dbg(CARD_TO_DEV(card), "block_size: x%08x\n", + card->config.data.block_size); + dev_dbg(CARD_TO_DEV(card), "stripe_size: x%08x\n", + card->config.data.stripe_size); + dev_dbg(CARD_TO_DEV(card), "vendor_id: x%08x\n", + card->config.data.vendor_id); + dev_dbg(CARD_TO_DEV(card), "cache_order: x%08x\n", + card->config.data.cache_order); + dev_dbg(CARD_TO_DEV(card), "mode: x%08x\n", + card->config.data.intr_coal.mode); + dev_dbg(CARD_TO_DEV(card), "count: x%08x\n", + card->config.data.intr_coal.count); + dev_dbg(CARD_TO_DEV(card), "latency: x%08x\n", + card->config.data.intr_coal.latency); + + return 0; +} + diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c new file mode 100644 index 00000000000..e5162487686 --- /dev/null +++ b/drivers/block/rsxx/core.c @@ -0,0 +1,649 @@ +/* +* Filename: core.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/reboot.h> +#include <linux/slab.h> +#include <linux/bitops.h> + +#include <linux/genhd.h> +#include <linux/idr.h> + +#include "rsxx_priv.h" +#include "rsxx_cfg.h" + +#define NO_LEGACY 0 + +MODULE_DESCRIPTION("IBM RamSan PCIe Flash SSD Device Driver"); +MODULE_AUTHOR("IBM <support@ramsan.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRIVER_VERSION); + +static unsigned int force_legacy = NO_LEGACY; +module_param(force_legacy, uint, 0444); +MODULE_PARM_DESC(force_legacy, "Force the use of legacy type PCI interrupts"); + +static DEFINE_IDA(rsxx_disk_ida); +static DEFINE_SPINLOCK(rsxx_ida_lock); + +/*----------------- Interrupt Control & Handling -------------------*/ +static void __enable_intr(unsigned int *mask, unsigned int intr) +{ + *mask |= intr; +} + +static void __disable_intr(unsigned int *mask, unsigned int intr) +{ + *mask &= ~intr; +} + +/* + * NOTE: Disabling the IER will disable the hardware interrupt. + * Disabling the ISR will disable the software handling of the ISR bit. + * + * Enable/Disable interrupt functions assume the card->irq_lock + * is held by the caller. + */ +void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr) +{ + if (unlikely(card->halt)) + return; + + __enable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr) +{ + __disable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr) +{ + if (unlikely(card->halt)) + return; + + __enable_intr(&card->isr_mask, intr); + __enable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} +void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr) +{ + __disable_intr(&card->isr_mask, intr); + __disable_intr(&card->ier_mask, intr); + iowrite32(card->ier_mask, card->regmap + IER); +} + +static irqreturn_t rsxx_isr(int irq, void *pdata) +{ + struct rsxx_cardinfo *card = pdata; + unsigned int isr; + int handled = 0; + int reread_isr; + int i; + + spin_lock(&card->irq_lock); + + do { + reread_isr = 0; + + isr = ioread32(card->regmap + ISR); + if (isr == 0xffffffff) { + /* + * A few systems seem to have an intermittent issue + * where PCI reads return all Fs, but retrying the read + * a little later will return as expected. + */ + dev_info(CARD_TO_DEV(card), + "ISR = 0xFFFFFFFF, retrying later\n"); + break; + } + + isr &= card->isr_mask; + if (!isr) + break; + + for (i = 0; i < card->n_targets; i++) { + if (isr & CR_INTR_DMA(i)) { + if (card->ier_mask & CR_INTR_DMA(i)) { + rsxx_disable_ier(card, CR_INTR_DMA(i)); + reread_isr = 1; + } + queue_work(card->ctrl[i].done_wq, + &card->ctrl[i].dma_done_work); + handled++; + } + } + + if (isr & CR_INTR_CREG) { + schedule_work(&card->creg_ctrl.done_work); + handled++; + } + + if (isr & CR_INTR_EVENT) { + schedule_work(&card->event_work); + rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); + handled++; + } + } while (reread_isr); + + spin_unlock(&card->irq_lock); + + return handled ? IRQ_HANDLED : IRQ_NONE; +} + +/*----------------- Card Event Handler -------------------*/ +static char *rsxx_card_state_to_str(unsigned int state) +{ + static char *state_strings[] = { + "Unknown", "Shutdown", "Starting", "Formatting", + "Uninitialized", "Good", "Shutting Down", + "Fault", "Read Only Fault", "dStroying" + }; + + return state_strings[ffs(state)]; +} + +static void card_state_change(struct rsxx_cardinfo *card, + unsigned int new_state) +{ + int st; + + dev_info(CARD_TO_DEV(card), + "card state change detected.(%s -> %s)\n", + rsxx_card_state_to_str(card->state), + rsxx_card_state_to_str(new_state)); + + card->state = new_state; + + /* Don't attach DMA interfaces if the card has an invalid config */ + if (!card->config_valid) + return; + + switch (new_state) { + case CARD_STATE_RD_ONLY_FAULT: + dev_crit(CARD_TO_DEV(card), + "Hardware has entered read-only mode!\n"); + /* + * Fall through so the DMA devices can be attached and + * the user can attempt to pull off their data. + */ + case CARD_STATE_GOOD: + st = rsxx_get_card_size8(card, &card->size8); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed attaching DMA devices\n"); + + if (card->config_valid) + set_capacity(card->gendisk, card->size8 >> 9); + break; + + case CARD_STATE_FAULT: + dev_crit(CARD_TO_DEV(card), + "Hardware Fault reported!\n"); + /* Fall through. */ + + /* Everything else, detach DMA interface if it's attached. */ + case CARD_STATE_SHUTDOWN: + case CARD_STATE_STARTING: + case CARD_STATE_FORMATTING: + case CARD_STATE_UNINITIALIZED: + case CARD_STATE_SHUTTING_DOWN: + /* + * dStroy is a term coined by marketing to represent the low level + * secure erase. + */ + case CARD_STATE_DSTROYING: + set_capacity(card->gendisk, 0); + break; + } +} + +static void card_event_handler(struct work_struct *work) +{ + struct rsxx_cardinfo *card; + unsigned int state; + unsigned long flags; + int st; + + card = container_of(work, struct rsxx_cardinfo, event_work); + + if (unlikely(card->halt)) + return; + + /* + * Enable the interrupt now to avoid any weird race conditions where a + * state change might occur while rsxx_get_card_state() is + * processing a returned creg cmd. + */ + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + st = rsxx_get_card_state(card, &state); + if (st) { + dev_info(CARD_TO_DEV(card), + "Failed reading state after event.\n"); + return; + } + + if (card->state != state) + card_state_change(card, state); + + if (card->creg_ctrl.creg_stats.stat & CREG_STAT_LOG_PENDING) + rsxx_read_hw_log(card); +} + +/*----------------- Card Operations -------------------*/ +static int card_shutdown(struct rsxx_cardinfo *card) +{ + unsigned int state; + signed long start; + const int timeout = msecs_to_jiffies(120000); + int st; + + /* We can't issue a shutdown if the card is in a transition state */ + start = jiffies; + do { + st = rsxx_get_card_state(card, &state); + if (st) + return st; + } while (state == CARD_STATE_STARTING && + (jiffies - start < timeout)); + + if (state == CARD_STATE_STARTING) + return -ETIMEDOUT; + + /* Only issue a shutdown if we need to */ + if ((state != CARD_STATE_SHUTTING_DOWN) && + (state != CARD_STATE_SHUTDOWN)) { + st = rsxx_issue_card_cmd(card, CARD_CMD_SHUTDOWN); + if (st) + return st; + } + + start = jiffies; + do { + st = rsxx_get_card_state(card, &state); + if (st) + return st; + } while (state != CARD_STATE_SHUTDOWN && + (jiffies - start < timeout)); + + if (state != CARD_STATE_SHUTDOWN) + return -ETIMEDOUT; + + return 0; +} + +/*----------------- Driver Initialization & Setup -------------------*/ +/* Returns: 0 if the driver is compatible with the device + -1 if the driver is NOT compatible with the device */ +static int rsxx_compatibility_check(struct rsxx_cardinfo *card) +{ + unsigned char pci_rev; + + pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); + + if (pci_rev > RS70_PCI_REV_SUPPORTED) + return -1; + return 0; +} + +static int rsxx_pci_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + struct rsxx_cardinfo *card; + int st; + + dev_info(&dev->dev, "PCI-Flash SSD discovered\n"); + + card = kzalloc(sizeof(*card), GFP_KERNEL); + if (!card) + return -ENOMEM; + + card->dev = dev; + pci_set_drvdata(dev, card); + + do { + if (!ida_pre_get(&rsxx_disk_ida, GFP_KERNEL)) { + st = -ENOMEM; + goto failed_ida_get; + } + + spin_lock(&rsxx_ida_lock); + st = ida_get_new(&rsxx_disk_ida, &card->disk_id); + spin_unlock(&rsxx_ida_lock); + } while (st == -EAGAIN); + + if (st) + goto failed_ida_get; + + st = pci_enable_device(dev); + if (st) + goto failed_enable; + + pci_set_master(dev); + pci_set_dma_max_seg_size(dev, RSXX_HW_BLK_SIZE); + + st = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); + if (st) { + dev_err(CARD_TO_DEV(card), + "No usable DMA configuration,aborting\n"); + goto failed_dma_mask; + } + + st = pci_request_regions(dev, DRIVER_NAME); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed to request memory region\n"); + goto failed_request_regions; + } + + if (pci_resource_len(dev, 0) == 0) { + dev_err(CARD_TO_DEV(card), "BAR0 has length 0!\n"); + st = -ENOMEM; + goto failed_iomap; + } + + card->regmap = pci_iomap(dev, 0, 0); + if (!card->regmap) { + dev_err(CARD_TO_DEV(card), "Failed to map BAR0\n"); + st = -ENOMEM; + goto failed_iomap; + } + + spin_lock_init(&card->irq_lock); + card->halt = 0; + + spin_lock_irq(&card->irq_lock); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irq(&card->irq_lock); + + if (!force_legacy) { + st = pci_enable_msi(dev); + if (st) + dev_warn(CARD_TO_DEV(card), + "Failed to enable MSI\n"); + } + + st = request_irq(dev->irq, rsxx_isr, IRQF_DISABLED | IRQF_SHARED, + DRIVER_NAME, card); + if (st) { + dev_err(CARD_TO_DEV(card), + "Failed requesting IRQ%d\n", dev->irq); + goto failed_irq; + } + + /************* Setup Processor Command Interface *************/ + rsxx_creg_setup(card); + + spin_lock_irq(&card->irq_lock); + rsxx_enable_ier_and_isr(card, CR_INTR_CREG); + spin_unlock_irq(&card->irq_lock); + + st = rsxx_compatibility_check(card); + if (st) { + dev_warn(CARD_TO_DEV(card), + "Incompatible driver detected. Please update the driver.\n"); + st = -EINVAL; + goto failed_compatiblity_check; + } + + /************* Load Card Config *************/ + st = rsxx_load_config(card); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed loading card config\n"); + + /************* Setup DMA Engine *************/ + st = rsxx_get_num_targets(card, &card->n_targets); + if (st) + dev_info(CARD_TO_DEV(card), + "Failed reading the number of DMA targets\n"); + + card->ctrl = kzalloc(card->n_targets * sizeof(*card->ctrl), GFP_KERNEL); + if (!card->ctrl) { + st = -ENOMEM; + goto failed_dma_setup; + } + + st = rsxx_dma_setup(card); + if (st) { + dev_info(CARD_TO_DEV(card), + "Failed to setup DMA engine\n"); + goto failed_dma_setup; + } + + /************* Setup Card Event Handler *************/ + INIT_WORK(&card->event_work, card_event_handler); + + st = rsxx_setup_dev(card); + if (st) + goto failed_create_dev; + + rsxx_get_card_state(card, &card->state); + + dev_info(CARD_TO_DEV(card), + "card state: %s\n", + rsxx_card_state_to_str(card->state)); + + /* + * Now that the DMA Engine and devices have been setup, + * we can enable the event interrupt(it kicks off actions in + * those layers so we couldn't enable it right away.) + */ + spin_lock_irq(&card->irq_lock); + rsxx_enable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irq(&card->irq_lock); + + if (card->state == CARD_STATE_SHUTDOWN) { + st = rsxx_issue_card_cmd(card, CARD_CMD_STARTUP); + if (st) + dev_crit(CARD_TO_DEV(card), + "Failed issuing card startup\n"); + } else if (card->state == CARD_STATE_GOOD || + card->state == CARD_STATE_RD_ONLY_FAULT) { + st = rsxx_get_card_size8(card, &card->size8); + if (st) + card->size8 = 0; + } + + rsxx_attach_dev(card); + + return 0; + +failed_create_dev: + rsxx_dma_destroy(card); +failed_dma_setup: +failed_compatiblity_check: + spin_lock_irq(&card->irq_lock); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irq(&card->irq_lock); + free_irq(dev->irq, card); + if (!force_legacy) + pci_disable_msi(dev); +failed_irq: + pci_iounmap(dev, card->regmap); +failed_iomap: + pci_release_regions(dev); +failed_request_regions: +failed_dma_mask: + pci_disable_device(dev); +failed_enable: + spin_lock(&rsxx_ida_lock); + ida_remove(&rsxx_disk_ida, card->disk_id); + spin_unlock(&rsxx_ida_lock); +failed_ida_get: + kfree(card); + + return st; +} + +static void rsxx_pci_remove(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int st; + int i; + + if (!card) + return; + + dev_info(CARD_TO_DEV(card), + "Removing PCI-Flash SSD.\n"); + + rsxx_detach_dev(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + st = card_shutdown(card); + if (st) + dev_crit(CARD_TO_DEV(card), "Shutdown failed!\n"); + + /* Sync outstanding event handlers. */ + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + /* Prevent work_structs from re-queuing themselves. */ + card->halt = 1; + + cancel_work_sync(&card->event_work); + + rsxx_destroy_dev(card); + rsxx_dma_destroy(card); + + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_ALL); + spin_unlock_irqrestore(&card->irq_lock, flags); + free_irq(dev->irq, card); + + if (!force_legacy) + pci_disable_msi(dev); + + rsxx_creg_destroy(card); + + pci_iounmap(dev, card->regmap); + + pci_disable_device(dev); + pci_release_regions(dev); + + kfree(card); +} + +static int rsxx_pci_suspend(struct pci_dev *dev, pm_message_t state) +{ + /* We don't support suspend at this time. */ + return -ENOSYS; +} + +static void rsxx_pci_shutdown(struct pci_dev *dev) +{ + struct rsxx_cardinfo *card = pci_get_drvdata(dev); + unsigned long flags; + int i; + + if (!card) + return; + + dev_info(CARD_TO_DEV(card), "Shutting down PCI-Flash SSD.\n"); + + rsxx_detach_dev(card); + + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + card_shutdown(card); +} + +static DEFINE_PCI_DEVICE_TABLE(rsxx_pci_ids) = { + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS70D_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS80_FLASH)}, + {PCI_DEVICE(PCI_VENDOR_ID_TMS_IBM, PCI_DEVICE_ID_RS81_FLASH)}, + {0,}, +}; + +MODULE_DEVICE_TABLE(pci, rsxx_pci_ids); + +static struct pci_driver rsxx_pci_driver = { + .name = DRIVER_NAME, + .id_table = rsxx_pci_ids, + .probe = rsxx_pci_probe, + .remove = rsxx_pci_remove, + .suspend = rsxx_pci_suspend, + .shutdown = rsxx_pci_shutdown, +}; + +static int __init rsxx_core_init(void) +{ + int st; + + st = rsxx_dev_init(); + if (st) + return st; + + st = rsxx_dma_init(); + if (st) + goto dma_init_failed; + + st = rsxx_creg_init(); + if (st) + goto creg_init_failed; + + return pci_register_driver(&rsxx_pci_driver); + +creg_init_failed: + rsxx_dma_cleanup(); +dma_init_failed: + rsxx_dev_cleanup(); + + return st; +} + +static void __exit rsxx_core_cleanup(void) +{ + pci_unregister_driver(&rsxx_pci_driver); + rsxx_creg_cleanup(); + rsxx_dma_cleanup(); + rsxx_dev_cleanup(); +} + +module_init(rsxx_core_init); +module_exit(rsxx_core_cleanup); diff --git a/drivers/block/rsxx/cregs.c b/drivers/block/rsxx/cregs.c new file mode 100644 index 00000000000..80bbe639fcc --- /dev/null +++ b/drivers/block/rsxx/cregs.c @@ -0,0 +1,758 @@ +/* +* Filename: cregs.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/completion.h> +#include <linux/slab.h> + +#include "rsxx_priv.h" + +#define CREG_TIMEOUT_MSEC 10000 + +typedef void (*creg_cmd_cb)(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st); + +struct creg_cmd { + struct list_head list; + creg_cmd_cb cb; + void *cb_private; + unsigned int op; + unsigned int addr; + int cnt8; + void *buf; + unsigned int stream; + unsigned int status; +}; + +static struct kmem_cache *creg_cmd_pool; + + +/*------------ Private Functions --------------*/ + +#if defined(__LITTLE_ENDIAN) +#define LITTLE_ENDIAN 1 +#elif defined(__BIG_ENDIAN) +#define LITTLE_ENDIAN 0 +#else +#error Unknown endianess!!! Aborting... +#endif + +static void copy_to_creg_data(struct rsxx_cardinfo *card, + int cnt8, + void *buf, + unsigned int stream) +{ + int i = 0; + u32 *data = buf; + + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { + /* + * Firmware implementation makes it necessary to byte swap on + * little endian processors. + */ + if (LITTLE_ENDIAN && stream) + iowrite32be(data[i], card->regmap + CREG_DATA(i)); + else + iowrite32(data[i], card->regmap + CREG_DATA(i)); + } +} + + +static void copy_from_creg_data(struct rsxx_cardinfo *card, + int cnt8, + void *buf, + unsigned int stream) +{ + int i = 0; + u32 *data = buf; + + for (i = 0; cnt8 > 0; i++, cnt8 -= 4) { + /* + * Firmware implementation makes it necessary to byte swap on + * little endian processors. + */ + if (LITTLE_ENDIAN && stream) + data[i] = ioread32be(card->regmap + CREG_DATA(i)); + else + data[i] = ioread32(card->regmap + CREG_DATA(i)); + } +} + +static struct creg_cmd *pop_active_cmd(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd; + + /* + * Spin lock is needed because this can be called in atomic/interrupt + * context. + */ + spin_lock_bh(&card->creg_ctrl.lock); + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + spin_unlock_bh(&card->creg_ctrl.lock); + + return cmd; +} + +static void creg_issue_cmd(struct rsxx_cardinfo *card, struct creg_cmd *cmd) +{ + iowrite32(cmd->addr, card->regmap + CREG_ADD); + iowrite32(cmd->cnt8, card->regmap + CREG_CNT); + + if (cmd->op == CREG_OP_WRITE) { + if (cmd->buf) + copy_to_creg_data(card, cmd->cnt8, + cmd->buf, cmd->stream); + } + + /* + * Data copy must complete before initiating the command. This is + * needed for weakly ordered processors (i.e. PowerPC), so that all + * neccessary registers are written before we kick the hardware. + */ + wmb(); + + /* Setting the valid bit will kick off the command. */ + iowrite32(cmd->op, card->regmap + CREG_CMD); +} + +static void creg_kick_queue(struct rsxx_cardinfo *card) +{ + if (card->creg_ctrl.active || list_empty(&card->creg_ctrl.queue)) + return; + + card->creg_ctrl.active = 1; + card->creg_ctrl.active_cmd = list_first_entry(&card->creg_ctrl.queue, + struct creg_cmd, list); + list_del(&card->creg_ctrl.active_cmd->list); + card->creg_ctrl.q_depth--; + + /* + * We have to set the timer before we push the new command. Otherwise, + * we could create a race condition that would occur if the timer + * was not canceled, and expired after the new command was pushed, + * but before the command was issued to hardware. + */ + mod_timer(&card->creg_ctrl.cmd_timer, + jiffies + msecs_to_jiffies(CREG_TIMEOUT_MSEC)); + + creg_issue_cmd(card, card->creg_ctrl.active_cmd); +} + +static int creg_queue_cmd(struct rsxx_cardinfo *card, + unsigned int op, + unsigned int addr, + unsigned int cnt8, + void *buf, + int stream, + creg_cmd_cb callback, + void *cb_private) +{ + struct creg_cmd *cmd; + + /* Don't queue stuff up if we're halted. */ + if (unlikely(card->halt)) + return -EINVAL; + + if (card->creg_ctrl.reset) + return -EAGAIN; + + if (cnt8 > MAX_CREG_DATA8) + return -EINVAL; + + cmd = kmem_cache_alloc(creg_cmd_pool, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + INIT_LIST_HEAD(&cmd->list); + + cmd->op = op; + cmd->addr = addr; + cmd->cnt8 = cnt8; + cmd->buf = buf; + cmd->stream = stream; + cmd->cb = callback; + cmd->cb_private = cb_private; + cmd->status = 0; + + spin_lock(&card->creg_ctrl.lock); + list_add_tail(&cmd->list, &card->creg_ctrl.queue); + card->creg_ctrl.q_depth++; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); + + return 0; +} + +static void creg_cmd_timed_out(unsigned long data) +{ + struct rsxx_cardinfo *card = (struct rsxx_cardinfo *) data; + struct creg_cmd *cmd; + + cmd = pop_active_cmd(card); + if (cmd == NULL) { + card->creg_ctrl.creg_stats.creg_timeout++; + dev_warn(CARD_TO_DEV(card), + "No active command associated with timeout!\n"); + return; + } + + if (cmd->cb) + cmd->cb(card, cmd, -ETIMEDOUT); + + kmem_cache_free(creg_cmd_pool, cmd); + + + spin_lock(&card->creg_ctrl.lock); + card->creg_ctrl.active = 0; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); +} + + +static void creg_cmd_done(struct work_struct *work) +{ + struct rsxx_cardinfo *card; + struct creg_cmd *cmd; + int st = 0; + + card = container_of(work, struct rsxx_cardinfo, + creg_ctrl.done_work); + + /* + * The timer could not be cancelled for some reason, + * race to pop the active command. + */ + if (del_timer_sync(&card->creg_ctrl.cmd_timer) == 0) + card->creg_ctrl.creg_stats.failed_cancel_timer++; + + cmd = pop_active_cmd(card); + if (cmd == NULL) { + dev_err(CARD_TO_DEV(card), + "Spurious creg interrupt!\n"); + return; + } + + card->creg_ctrl.creg_stats.stat = ioread32(card->regmap + CREG_STAT); + cmd->status = card->creg_ctrl.creg_stats.stat; + if ((cmd->status & CREG_STAT_STATUS_MASK) == 0) { + dev_err(CARD_TO_DEV(card), + "Invalid status on creg command\n"); + /* + * At this point we're probably reading garbage from HW. Don't + * do anything else that could mess up the system and let + * the sync function return an error. + */ + st = -EIO; + goto creg_done; + } else if (cmd->status & CREG_STAT_ERROR) { + st = -EIO; + } + + if ((cmd->op == CREG_OP_READ)) { + unsigned int cnt8 = ioread32(card->regmap + CREG_CNT); + + /* Paranoid Sanity Checks */ + if (!cmd->buf) { + dev_err(CARD_TO_DEV(card), + "Buffer not given for read.\n"); + st = -EIO; + goto creg_done; + } + if (cnt8 != cmd->cnt8) { + dev_err(CARD_TO_DEV(card), + "count mismatch\n"); + st = -EIO; + goto creg_done; + } + + copy_from_creg_data(card, cnt8, cmd->buf, cmd->stream); + } + +creg_done: + if (cmd->cb) + cmd->cb(card, cmd, st); + + kmem_cache_free(creg_cmd_pool, cmd); + + spin_lock(&card->creg_ctrl.lock); + card->creg_ctrl.active = 0; + creg_kick_queue(card); + spin_unlock(&card->creg_ctrl.lock); +} + +static void creg_reset(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd = NULL; + struct creg_cmd *tmp; + unsigned long flags; + + /* + * mutex_trylock is used here because if reset_lock is taken then a + * reset is already happening. So, we can just go ahead and return. + */ + if (!mutex_trylock(&card->creg_ctrl.reset_lock)) + return; + + card->creg_ctrl.reset = 1; + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_disable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + dev_warn(CARD_TO_DEV(card), + "Resetting creg interface for recovery\n"); + + /* Cancel outstanding commands */ + spin_lock(&card->creg_ctrl.lock); + list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { + list_del(&cmd->list); + card->creg_ctrl.q_depth--; + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + } + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + if (cmd) { + if (timer_pending(&card->creg_ctrl.cmd_timer)) + del_timer_sync(&card->creg_ctrl.cmd_timer); + + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + + card->creg_ctrl.active = 0; + } + spin_unlock(&card->creg_ctrl.lock); + + card->creg_ctrl.reset = 0; + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_CREG | CR_INTR_EVENT); + spin_unlock_irqrestore(&card->irq_lock, flags); + + mutex_unlock(&card->creg_ctrl.reset_lock); +} + +/* Used for synchronous accesses */ +struct creg_completion { + struct completion *cmd_done; + int st; + u32 creg_status; +}; + +static void creg_cmd_done_cb(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st) +{ + struct creg_completion *cmd_completion; + + cmd_completion = cmd->cb_private; + BUG_ON(!cmd_completion); + + cmd_completion->st = st; + cmd_completion->creg_status = cmd->status; + complete(cmd_completion->cmd_done); +} + +static int __issue_creg_rw(struct rsxx_cardinfo *card, + unsigned int op, + unsigned int addr, + unsigned int cnt8, + void *buf, + int stream, + unsigned int *hw_stat) +{ + DECLARE_COMPLETION_ONSTACK(cmd_done); + struct creg_completion completion; + unsigned long timeout; + int st; + + completion.cmd_done = &cmd_done; + completion.st = 0; + completion.creg_status = 0; + + st = creg_queue_cmd(card, op, addr, cnt8, buf, stream, creg_cmd_done_cb, + &completion); + if (st) + return st; + + /* + * This timeout is neccessary for unresponsive hardware. The additional + * 20 seconds to used to guarantee that each cregs requests has time to + * complete. + */ + timeout = msecs_to_jiffies((CREG_TIMEOUT_MSEC * + card->creg_ctrl.q_depth) + 20000); + + /* + * The creg interface is guaranteed to complete. It has a timeout + * mechanism that will kick in if hardware does not respond. + */ + st = wait_for_completion_timeout(completion.cmd_done, timeout); + if (st == 0) { + /* + * This is really bad, because the kernel timer did not + * expire and notify us of a timeout! + */ + dev_crit(CARD_TO_DEV(card), + "cregs timer failed\n"); + creg_reset(card); + return -EIO; + } + + *hw_stat = completion.creg_status; + + if (completion.st) { + dev_warn(CARD_TO_DEV(card), + "creg command failed(%d x%08x)\n", + completion.st, addr); + return completion.st; + } + + return 0; +} + +static int issue_creg_rw(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int stream, + int read) +{ + unsigned int hw_stat; + unsigned int xfer; + unsigned int op; + int st; + + op = read ? CREG_OP_READ : CREG_OP_WRITE; + + do { + xfer = min_t(unsigned int, size8, MAX_CREG_DATA8); + + st = __issue_creg_rw(card, op, addr, xfer, + data, stream, &hw_stat); + if (st) + return st; + + data = (char *)data + xfer; + addr += xfer; + size8 -= xfer; + } while (size8); + + return 0; +} + +/* ---------------------------- Public API ---------------------------------- */ +int rsxx_creg_write(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream) +{ + return issue_creg_rw(card, addr, size8, data, byte_stream, 0); +} + +int rsxx_creg_read(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream) +{ + return issue_creg_rw(card, addr, size8, data, byte_stream, 1); +} + +int rsxx_get_card_state(struct rsxx_cardinfo *card, unsigned int *state) +{ + return rsxx_creg_read(card, CREG_ADD_CARD_STATE, + sizeof(*state), state, 0); +} + +int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8) +{ + unsigned int size; + int st; + + st = rsxx_creg_read(card, CREG_ADD_CARD_SIZE, + sizeof(size), &size, 0); + if (st) + return st; + + *size8 = (u64)size * RSXX_HW_BLK_SIZE; + return 0; +} + +int rsxx_get_num_targets(struct rsxx_cardinfo *card, + unsigned int *n_targets) +{ + return rsxx_creg_read(card, CREG_ADD_NUM_TARGETS, + sizeof(*n_targets), n_targets, 0); +} + +int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, + u32 *capabilities) +{ + return rsxx_creg_read(card, CREG_ADD_CAPABILITIES, + sizeof(*capabilities), capabilities, 0); +} + +int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd) +{ + return rsxx_creg_write(card, CREG_ADD_CARD_CMD, + sizeof(cmd), &cmd, 0); +} + + +/*----------------- HW Log Functions -------------------*/ +static void hw_log_msg(struct rsxx_cardinfo *card, const char *str, int len) +{ + static char level; + + /* + * New messages start with "<#>", where # is the log level. Messages + * that extend past the log buffer will use the previous level + */ + if ((len > 3) && (str[0] == '<') && (str[2] == '>')) { + level = str[1]; + str += 3; /* Skip past the log level. */ + len -= 3; + } + + switch (level) { + case '0': + dev_emerg(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '1': + dev_alert(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '2': + dev_crit(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '3': + dev_err(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '4': + dev_warn(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '5': + dev_notice(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '6': + dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + case '7': + dev_dbg(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + default: + dev_info(CARD_TO_DEV(card), "HW: %.*s", len, str); + break; + } +} + +/* + * The substrncpy function copies the src string (which includes the + * terminating '\0' character), up to the count into the dest pointer. + * Returns the number of bytes copied to dest. + */ +static int substrncpy(char *dest, const char *src, int count) +{ + int max_cnt = count; + + while (count) { + count--; + *dest = *src; + if (*dest == '\0') + break; + src++; + dest++; + } + return max_cnt - count; +} + + +static void read_hw_log_done(struct rsxx_cardinfo *card, + struct creg_cmd *cmd, + int st) +{ + char *buf; + char *log_str; + int cnt; + int len; + int off; + + buf = cmd->buf; + off = 0; + + /* Failed getting the log message */ + if (st) + return; + + while (off < cmd->cnt8) { + log_str = &card->log.buf[card->log.buf_len]; + cnt = min(cmd->cnt8 - off, LOG_BUF_SIZE8 - card->log.buf_len); + len = substrncpy(log_str, &buf[off], cnt); + + off += len; + card->log.buf_len += len; + + /* + * Flush the log if we've hit the end of a message or if we've + * run out of buffer space. + */ + if ((log_str[len - 1] == '\0') || + (card->log.buf_len == LOG_BUF_SIZE8)) { + if (card->log.buf_len != 1) /* Don't log blank lines. */ + hw_log_msg(card, card->log.buf, + card->log.buf_len); + card->log.buf_len = 0; + } + + } + + if (cmd->status & CREG_STAT_LOG_PENDING) + rsxx_read_hw_log(card); +} + +int rsxx_read_hw_log(struct rsxx_cardinfo *card) +{ + int st; + + st = creg_queue_cmd(card, CREG_OP_READ, CREG_ADD_LOG, + sizeof(card->log.tmp), card->log.tmp, + 1, read_hw_log_done, NULL); + if (st) + dev_err(CARD_TO_DEV(card), + "Failed getting log text\n"); + + return st; +} + +/*-------------- IOCTL REG Access ------------------*/ +static int issue_reg_cmd(struct rsxx_cardinfo *card, + struct rsxx_reg_access *cmd, + int read) +{ + unsigned int op = read ? CREG_OP_READ : CREG_OP_WRITE; + + return __issue_creg_rw(card, op, cmd->addr, cmd->cnt, cmd->data, + cmd->stream, &cmd->stat); +} + +int rsxx_reg_access(struct rsxx_cardinfo *card, + struct rsxx_reg_access __user *ucmd, + int read) +{ + struct rsxx_reg_access cmd; + int st; + + st = copy_from_user(&cmd, ucmd, sizeof(cmd)); + if (st) + return -EFAULT; + + if (cmd.cnt > RSXX_MAX_REG_CNT) + return -EFAULT; + + st = issue_reg_cmd(card, &cmd, read); + if (st) + return st; + + st = put_user(cmd.stat, &ucmd->stat); + if (st) + return -EFAULT; + + if (read) { + st = copy_to_user(ucmd->data, cmd.data, cmd.cnt); + if (st) + return -EFAULT; + } + + return 0; +} + +/*------------ Initialization & Setup --------------*/ +int rsxx_creg_setup(struct rsxx_cardinfo *card) +{ + card->creg_ctrl.active_cmd = NULL; + + INIT_WORK(&card->creg_ctrl.done_work, creg_cmd_done); + mutex_init(&card->creg_ctrl.reset_lock); + INIT_LIST_HEAD(&card->creg_ctrl.queue); + spin_lock_init(&card->creg_ctrl.lock); + setup_timer(&card->creg_ctrl.cmd_timer, creg_cmd_timed_out, + (unsigned long) card); + + return 0; +} + +void rsxx_creg_destroy(struct rsxx_cardinfo *card) +{ + struct creg_cmd *cmd; + struct creg_cmd *tmp; + int cnt = 0; + + /* Cancel outstanding commands */ + spin_lock(&card->creg_ctrl.lock); + list_for_each_entry_safe(cmd, tmp, &card->creg_ctrl.queue, list) { + list_del(&cmd->list); + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + kmem_cache_free(creg_cmd_pool, cmd); + cnt++; + } + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Canceled %d queue creg commands\n", cnt); + + cmd = card->creg_ctrl.active_cmd; + card->creg_ctrl.active_cmd = NULL; + if (cmd) { + if (timer_pending(&card->creg_ctrl.cmd_timer)) + del_timer_sync(&card->creg_ctrl.cmd_timer); + + if (cmd->cb) + cmd->cb(card, cmd, -ECANCELED); + dev_info(CARD_TO_DEV(card), + "Canceled active creg command\n"); + kmem_cache_free(creg_cmd_pool, cmd); + } + spin_unlock(&card->creg_ctrl.lock); + + cancel_work_sync(&card->creg_ctrl.done_work); +} + + +int rsxx_creg_init(void) +{ + creg_cmd_pool = KMEM_CACHE(creg_cmd, SLAB_HWCACHE_ALIGN); + if (!creg_cmd_pool) + return -ENOMEM; + + return 0; +} + +void rsxx_creg_cleanup(void) +{ + kmem_cache_destroy(creg_cmd_pool); +} diff --git a/drivers/block/rsxx/dev.c b/drivers/block/rsxx/dev.c new file mode 100644 index 00000000000..4346d17d294 --- /dev/null +++ b/drivers/block/rsxx/dev.c @@ -0,0 +1,367 @@ +/* +* Filename: dev.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/slab.h> + +#include <linux/hdreg.h> +#include <linux/genhd.h> +#include <linux/blkdev.h> +#include <linux/bio.h> + +#include <linux/fs.h> + +#include "rsxx_priv.h" + +static unsigned int blkdev_minors = 64; +module_param(blkdev_minors, uint, 0444); +MODULE_PARM_DESC(blkdev_minors, "Number of minors(partitions)"); + +/* + * For now I'm making this tweakable in case any applications hit this limit. + * If you see a "bio too big" error in the log you will need to raise this + * value. + */ +static unsigned int blkdev_max_hw_sectors = 1024; +module_param(blkdev_max_hw_sectors, uint, 0444); +MODULE_PARM_DESC(blkdev_max_hw_sectors, "Max hw sectors for a single BIO"); + +static unsigned int enable_blkdev = 1; +module_param(enable_blkdev , uint, 0444); +MODULE_PARM_DESC(enable_blkdev, "Enable block device interfaces"); + + +struct rsxx_bio_meta { + struct bio *bio; + atomic_t pending_dmas; + atomic_t error; + unsigned long start_time; +}; + +static struct kmem_cache *bio_meta_pool; + +/*----------------- Block Device Operations -----------------*/ +static int rsxx_blkdev_ioctl(struct block_device *bdev, + fmode_t mode, + unsigned int cmd, + unsigned long arg) +{ + struct rsxx_cardinfo *card = bdev->bd_disk->private_data; + + switch (cmd) { + case RSXX_GETREG: + return rsxx_reg_access(card, (void __user *)arg, 1); + case RSXX_SETREG: + return rsxx_reg_access(card, (void __user *)arg, 0); + } + + return -ENOTTY; +} + +static int rsxx_getgeo(struct block_device *bdev, struct hd_geometry *geo) +{ + struct rsxx_cardinfo *card = bdev->bd_disk->private_data; + u64 blocks = card->size8 >> 9; + + /* + * get geometry: Fake it. I haven't found any drivers that set + * geo->start, so we won't either. + */ + if (card->size8) { + geo->heads = 64; + geo->sectors = 16; + do_div(blocks, (geo->heads * geo->sectors)); + geo->cylinders = blocks; + } else { + geo->heads = 0; + geo->sectors = 0; + geo->cylinders = 0; + } + return 0; +} + +static const struct block_device_operations rsxx_fops = { + .owner = THIS_MODULE, + .getgeo = rsxx_getgeo, + .ioctl = rsxx_blkdev_ioctl, +}; + +static void disk_stats_start(struct rsxx_cardinfo *card, struct bio *bio) +{ + struct hd_struct *part0 = &card->gendisk->part0; + int rw = bio_data_dir(bio); + int cpu; + + cpu = part_stat_lock(); + + part_round_stats(cpu, part0); + part_inc_in_flight(part0, rw); + + part_stat_unlock(); +} + +static void disk_stats_complete(struct rsxx_cardinfo *card, + struct bio *bio, + unsigned long start_time) +{ + struct hd_struct *part0 = &card->gendisk->part0; + unsigned long duration = jiffies - start_time; + int rw = bio_data_dir(bio); + int cpu; + + cpu = part_stat_lock(); + + part_stat_add(cpu, part0, sectors[rw], bio_sectors(bio)); + part_stat_inc(cpu, part0, ios[rw]); + part_stat_add(cpu, part0, ticks[rw], duration); + + part_round_stats(cpu, part0); + part_dec_in_flight(part0, rw); + + part_stat_unlock(); +} + +static void bio_dma_done_cb(struct rsxx_cardinfo *card, + void *cb_data, + unsigned int error) +{ + struct rsxx_bio_meta *meta = cb_data; + + if (error) + atomic_set(&meta->error, 1); + + if (atomic_dec_and_test(&meta->pending_dmas)) { + disk_stats_complete(card, meta->bio, meta->start_time); + + bio_endio(meta->bio, atomic_read(&meta->error) ? -EIO : 0); + kmem_cache_free(bio_meta_pool, meta); + } +} + +static void rsxx_make_request(struct request_queue *q, struct bio *bio) +{ + struct rsxx_cardinfo *card = q->queuedata; + struct rsxx_bio_meta *bio_meta; + int st = -EINVAL; + + might_sleep(); + + if (unlikely(card->halt)) { + st = -EFAULT; + goto req_err; + } + + if (unlikely(card->dma_fault)) { + st = (-EFAULT); + goto req_err; + } + + if (bio->bi_size == 0) { + dev_err(CARD_TO_DEV(card), "size zero BIO!\n"); + goto req_err; + } + + bio_meta = kmem_cache_alloc(bio_meta_pool, GFP_KERNEL); + if (!bio_meta) { + st = -ENOMEM; + goto req_err; + } + + bio_meta->bio = bio; + atomic_set(&bio_meta->error, 0); + atomic_set(&bio_meta->pending_dmas, 0); + bio_meta->start_time = jiffies; + + disk_stats_start(card, bio); + + dev_dbg(CARD_TO_DEV(card), "BIO[%c]: meta: %p addr8: x%llx size: %d\n", + bio_data_dir(bio) ? 'W' : 'R', bio_meta, + (u64)bio->bi_sector << 9, bio->bi_size); + + st = rsxx_dma_queue_bio(card, bio, &bio_meta->pending_dmas, + bio_dma_done_cb, bio_meta); + if (st) + goto queue_err; + + return; + +queue_err: + kmem_cache_free(bio_meta_pool, bio_meta); +req_err: + bio_endio(bio, st); +} + +/*----------------- Device Setup -------------------*/ +static bool rsxx_discard_supported(struct rsxx_cardinfo *card) +{ + unsigned char pci_rev; + + pci_read_config_byte(card->dev, PCI_REVISION_ID, &pci_rev); + + return (pci_rev >= RSXX_DISCARD_SUPPORT); +} + +static unsigned short rsxx_get_logical_block_size( + struct rsxx_cardinfo *card) +{ + u32 capabilities = 0; + int st; + + st = rsxx_get_card_capabilities(card, &capabilities); + if (st) + dev_warn(CARD_TO_DEV(card), + "Failed reading card capabilities register\n"); + + /* Earlier firmware did not have support for 512 byte accesses */ + if (capabilities & CARD_CAP_SUBPAGE_WRITES) + return 512; + else + return RSXX_HW_BLK_SIZE; +} + +int rsxx_attach_dev(struct rsxx_cardinfo *card) +{ + mutex_lock(&card->dev_lock); + + /* The block device requires the stripe size from the config. */ + if (enable_blkdev) { + if (card->config_valid) + set_capacity(card->gendisk, card->size8 >> 9); + else + set_capacity(card->gendisk, 0); + add_disk(card->gendisk); + + card->bdev_attached = 1; + } + + mutex_unlock(&card->dev_lock); + + return 0; +} + +void rsxx_detach_dev(struct rsxx_cardinfo *card) +{ + mutex_lock(&card->dev_lock); + + if (card->bdev_attached) { + del_gendisk(card->gendisk); + card->bdev_attached = 0; + } + + mutex_unlock(&card->dev_lock); +} + +int rsxx_setup_dev(struct rsxx_cardinfo *card) +{ + unsigned short blk_size; + + mutex_init(&card->dev_lock); + + if (!enable_blkdev) + return 0; + + card->major = register_blkdev(0, DRIVER_NAME); + if (card->major < 0) { + dev_err(CARD_TO_DEV(card), "Failed to get major number\n"); + return -ENOMEM; + } + + card->queue = blk_alloc_queue(GFP_KERNEL); + if (!card->queue) { + dev_err(CARD_TO_DEV(card), "Failed queue alloc\n"); + unregister_blkdev(card->major, DRIVER_NAME); + return -ENOMEM; + } + + card->gendisk = alloc_disk(blkdev_minors); + if (!card->gendisk) { + dev_err(CARD_TO_DEV(card), "Failed disk alloc\n"); + blk_cleanup_queue(card->queue); + unregister_blkdev(card->major, DRIVER_NAME); + return -ENOMEM; + } + + blk_size = rsxx_get_logical_block_size(card); + + blk_queue_make_request(card->queue, rsxx_make_request); + blk_queue_bounce_limit(card->queue, BLK_BOUNCE_ANY); + blk_queue_dma_alignment(card->queue, blk_size - 1); + blk_queue_max_hw_sectors(card->queue, blkdev_max_hw_sectors); + blk_queue_logical_block_size(card->queue, blk_size); + blk_queue_physical_block_size(card->queue, RSXX_HW_BLK_SIZE); + + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, card->queue); + if (rsxx_discard_supported(card)) { + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, card->queue); + blk_queue_max_discard_sectors(card->queue, + RSXX_HW_BLK_SIZE >> 9); + card->queue->limits.discard_granularity = RSXX_HW_BLK_SIZE; + card->queue->limits.discard_alignment = RSXX_HW_BLK_SIZE; + card->queue->limits.discard_zeroes_data = 1; + } + + card->queue->queuedata = card; + + snprintf(card->gendisk->disk_name, sizeof(card->gendisk->disk_name), + "rsxx%d", card->disk_id); + card->gendisk->driverfs_dev = &card->dev->dev; + card->gendisk->major = card->major; + card->gendisk->first_minor = 0; + card->gendisk->fops = &rsxx_fops; + card->gendisk->private_data = card; + card->gendisk->queue = card->queue; + + return 0; +} + +void rsxx_destroy_dev(struct rsxx_cardinfo *card) +{ + if (!enable_blkdev) + return; + + put_disk(card->gendisk); + card->gendisk = NULL; + + blk_cleanup_queue(card->queue); + unregister_blkdev(card->major, DRIVER_NAME); +} + +int rsxx_dev_init(void) +{ + bio_meta_pool = KMEM_CACHE(rsxx_bio_meta, SLAB_HWCACHE_ALIGN); + if (!bio_meta_pool) + return -ENOMEM; + + return 0; +} + +void rsxx_dev_cleanup(void) +{ + kmem_cache_destroy(bio_meta_pool); +} + + diff --git a/drivers/block/rsxx/dma.c b/drivers/block/rsxx/dma.c new file mode 100644 index 00000000000..63176e67662 --- /dev/null +++ b/drivers/block/rsxx/dma.c @@ -0,0 +1,998 @@ +/* +* Filename: dma.c +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#include <linux/slab.h> +#include "rsxx_priv.h" + +struct rsxx_dma { + struct list_head list; + u8 cmd; + unsigned int laddr; /* Logical address on the ramsan */ + struct { + u32 off; + u32 cnt; + } sub_page; + dma_addr_t dma_addr; + struct page *page; + unsigned int pg_off; /* Page Offset */ + rsxx_dma_cb cb; + void *cb_data; +}; + +/* This timeout is used to detect a stalled DMA channel */ +#define DMA_ACTIVITY_TIMEOUT msecs_to_jiffies(10000) + +struct hw_status { + u8 status; + u8 tag; + __le16 count; + __le32 _rsvd2; + __le64 _rsvd3; +} __packed; + +enum rsxx_dma_status { + DMA_SW_ERR = 0x1, + DMA_HW_FAULT = 0x2, + DMA_CANCELLED = 0x4, +}; + +struct hw_cmd { + u8 command; + u8 tag; + u8 _rsvd; + u8 sub_page; /* Bit[0:2]: 512byte offset */ + /* Bit[4:6]: 512byte count */ + __le32 device_addr; + __le64 host_addr; +} __packed; + +enum rsxx_hw_cmd { + HW_CMD_BLK_DISCARD = 0x70, + HW_CMD_BLK_WRITE = 0x80, + HW_CMD_BLK_READ = 0xC0, + HW_CMD_BLK_RECON_READ = 0xE0, +}; + +enum rsxx_hw_status { + HW_STATUS_CRC = 0x01, + HW_STATUS_HARD_ERR = 0x02, + HW_STATUS_SOFT_ERR = 0x04, + HW_STATUS_FAULT = 0x08, +}; + +#define STATUS_BUFFER_SIZE8 4096 +#define COMMAND_BUFFER_SIZE8 4096 + +static struct kmem_cache *rsxx_dma_pool; + +struct dma_tracker { + int next_tag; + struct rsxx_dma *dma; +}; + +#define DMA_TRACKER_LIST_SIZE8 (sizeof(struct dma_tracker_list) + \ + (sizeof(struct dma_tracker) * RSXX_MAX_OUTSTANDING_CMDS)) + +struct dma_tracker_list { + spinlock_t lock; + int head; + struct dma_tracker list[0]; +}; + + +/*----------------- Misc Utility Functions -------------------*/ +static unsigned int rsxx_addr8_to_laddr(u64 addr8, struct rsxx_cardinfo *card) +{ + unsigned long long tgt_addr8; + + tgt_addr8 = ((addr8 >> card->_stripe.upper_shift) & + card->_stripe.upper_mask) | + ((addr8) & card->_stripe.lower_mask); + do_div(tgt_addr8, RSXX_HW_BLK_SIZE); + return tgt_addr8; +} + +static unsigned int rsxx_get_dma_tgt(struct rsxx_cardinfo *card, u64 addr8) +{ + unsigned int tgt; + + tgt = (addr8 >> card->_stripe.target_shift) & card->_stripe.target_mask; + + return tgt; +} + +static void rsxx_dma_queue_reset(struct rsxx_cardinfo *card) +{ + /* Reset all DMA Command/Status Queues */ + iowrite32(DMA_QUEUE_RESET, card->regmap + RESET); +} + +static unsigned int get_dma_size(struct rsxx_dma *dma) +{ + if (dma->sub_page.cnt) + return dma->sub_page.cnt << 9; + else + return RSXX_HW_BLK_SIZE; +} + + +/*----------------- DMA Tracker -------------------*/ +static void set_tracker_dma(struct dma_tracker_list *trackers, + int tag, + struct rsxx_dma *dma) +{ + trackers->list[tag].dma = dma; +} + +static struct rsxx_dma *get_tracker_dma(struct dma_tracker_list *trackers, + int tag) +{ + return trackers->list[tag].dma; +} + +static int pop_tracker(struct dma_tracker_list *trackers) +{ + int tag; + + spin_lock(&trackers->lock); + tag = trackers->head; + if (tag != -1) { + trackers->head = trackers->list[tag].next_tag; + trackers->list[tag].next_tag = -1; + } + spin_unlock(&trackers->lock); + + return tag; +} + +static void push_tracker(struct dma_tracker_list *trackers, int tag) +{ + spin_lock(&trackers->lock); + trackers->list[tag].next_tag = trackers->head; + trackers->head = tag; + trackers->list[tag].dma = NULL; + spin_unlock(&trackers->lock); +} + + +/*----------------- Interrupt Coalescing -------------*/ +/* + * Interrupt Coalescing Register Format: + * Interrupt Timer (64ns units) [15:0] + * Interrupt Count [24:16] + * Reserved [31:25] +*/ +#define INTR_COAL_LATENCY_MASK (0x0000ffff) + +#define INTR_COAL_COUNT_SHIFT 16 +#define INTR_COAL_COUNT_BITS 9 +#define INTR_COAL_COUNT_MASK (((1 << INTR_COAL_COUNT_BITS) - 1) << \ + INTR_COAL_COUNT_SHIFT) +#define INTR_COAL_LATENCY_UNITS_NS 64 + + +static u32 dma_intr_coal_val(u32 mode, u32 count, u32 latency) +{ + u32 latency_units = latency / INTR_COAL_LATENCY_UNITS_NS; + + if (mode == RSXX_INTR_COAL_DISABLED) + return 0; + + return ((count << INTR_COAL_COUNT_SHIFT) & INTR_COAL_COUNT_MASK) | + (latency_units & INTR_COAL_LATENCY_MASK); + +} + +static void dma_intr_coal_auto_tune(struct rsxx_cardinfo *card) +{ + int i; + u32 q_depth = 0; + u32 intr_coal; + + if (card->config.data.intr_coal.mode != RSXX_INTR_COAL_AUTO_TUNE) + return; + + for (i = 0; i < card->n_targets; i++) + q_depth += atomic_read(&card->ctrl[i].stats.hw_q_depth); + + intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, + q_depth / 2, + card->config.data.intr_coal.latency); + iowrite32(intr_coal, card->regmap + INTR_COAL); +} + +/*----------------- RSXX DMA Handling -------------------*/ +static void rsxx_complete_dma(struct rsxx_cardinfo *card, + struct rsxx_dma *dma, + unsigned int status) +{ + if (status & DMA_SW_ERR) + printk_ratelimited(KERN_ERR + "SW Error in DMA(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + if (status & DMA_HW_FAULT) + printk_ratelimited(KERN_ERR + "HW Fault in DMA(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + if (status & DMA_CANCELLED) + printk_ratelimited(KERN_ERR + "DMA Cancelled(cmd x%02x, laddr x%08x)\n", + dma->cmd, dma->laddr); + + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, get_dma_size(dma), + dma->cmd == HW_CMD_BLK_WRITE ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + + if (dma->cb) + dma->cb(card, dma->cb_data, status ? 1 : 0); + + kmem_cache_free(rsxx_dma_pool, dma); +} + +static void rsxx_requeue_dma(struct rsxx_dma_ctrl *ctrl, + struct rsxx_dma *dma) +{ + /* + * Requeued DMAs go to the front of the queue so they are issued + * first. + */ + spin_lock(&ctrl->queue_lock); + list_add(&dma->list, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); +} + +static void rsxx_handle_dma_error(struct rsxx_dma_ctrl *ctrl, + struct rsxx_dma *dma, + u8 hw_st) +{ + unsigned int status = 0; + int requeue_cmd = 0; + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Handling DMA error(cmd x%02x, laddr x%08x st:x%02x)\n", + dma->cmd, dma->laddr, hw_st); + + if (hw_st & HW_STATUS_CRC) + ctrl->stats.crc_errors++; + if (hw_st & HW_STATUS_HARD_ERR) + ctrl->stats.hard_errors++; + if (hw_st & HW_STATUS_SOFT_ERR) + ctrl->stats.soft_errors++; + + switch (dma->cmd) { + case HW_CMD_BLK_READ: + if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { + if (ctrl->card->scrub_hard) { + dma->cmd = HW_CMD_BLK_RECON_READ; + requeue_cmd = 1; + ctrl->stats.reads_retried++; + } else { + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + } else if (hw_st & HW_STATUS_FAULT) { + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + + break; + case HW_CMD_BLK_RECON_READ: + if (hw_st & (HW_STATUS_CRC | HW_STATUS_HARD_ERR)) { + /* Data could not be reconstructed. */ + status |= DMA_HW_FAULT; + ctrl->stats.reads_failed++; + } + + break; + case HW_CMD_BLK_WRITE: + status |= DMA_HW_FAULT; + ctrl->stats.writes_failed++; + + break; + case HW_CMD_BLK_DISCARD: + status |= DMA_HW_FAULT; + ctrl->stats.discards_failed++; + + break; + default: + dev_err(CARD_TO_DEV(ctrl->card), + "Unknown command in DMA!(cmd: x%02x " + "laddr x%08x st: x%02x\n", + dma->cmd, dma->laddr, hw_st); + status |= DMA_SW_ERR; + + break; + } + + if (requeue_cmd) + rsxx_requeue_dma(ctrl, dma); + else + rsxx_complete_dma(ctrl->card, dma, status); +} + +static void dma_engine_stalled(unsigned long data) +{ + struct rsxx_dma_ctrl *ctrl = (struct rsxx_dma_ctrl *)data; + + if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + return; + + if (ctrl->cmd.idx != ioread32(ctrl->regmap + SW_CMD_IDX)) { + /* + * The dma engine was stalled because the SW_CMD_IDX write + * was lost. Issue it again to recover. + */ + dev_warn(CARD_TO_DEV(ctrl->card), + "SW_CMD_IDX write was lost, re-writing...\n"); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + } else { + dev_warn(CARD_TO_DEV(ctrl->card), + "DMA channel %d has stalled, faulting interface.\n", + ctrl->id); + ctrl->card->dma_fault = 1; + } +} + +static void rsxx_issue_dmas(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + int tag; + int cmds_pending = 0; + struct hw_cmd *hw_cmd_buf; + + ctrl = container_of(work, struct rsxx_dma_ctrl, issue_dma_work); + hw_cmd_buf = ctrl->cmd.buf; + + if (unlikely(ctrl->card->halt)) + return; + + while (1) { + spin_lock(&ctrl->queue_lock); + if (list_empty(&ctrl->queue)) { + spin_unlock(&ctrl->queue_lock); + break; + } + spin_unlock(&ctrl->queue_lock); + + tag = pop_tracker(ctrl->trackers); + if (tag == -1) + break; + + spin_lock(&ctrl->queue_lock); + dma = list_entry(ctrl->queue.next, struct rsxx_dma, list); + list_del(&dma->list); + ctrl->stats.sw_q_depth--; + spin_unlock(&ctrl->queue_lock); + + /* + * This will catch any DMAs that slipped in right before the + * fault, but was queued after all the other DMAs were + * cancelled. + */ + if (unlikely(ctrl->card->dma_fault)) { + push_tracker(ctrl->trackers, tag); + rsxx_complete_dma(ctrl->card, dma, DMA_CANCELLED); + continue; + } + + set_tracker_dma(ctrl->trackers, tag, dma); + hw_cmd_buf[ctrl->cmd.idx].command = dma->cmd; + hw_cmd_buf[ctrl->cmd.idx].tag = tag; + hw_cmd_buf[ctrl->cmd.idx]._rsvd = 0; + hw_cmd_buf[ctrl->cmd.idx].sub_page = + ((dma->sub_page.cnt & 0x7) << 4) | + (dma->sub_page.off & 0x7); + + hw_cmd_buf[ctrl->cmd.idx].device_addr = + cpu_to_le32(dma->laddr); + + hw_cmd_buf[ctrl->cmd.idx].host_addr = + cpu_to_le64(dma->dma_addr); + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Issue DMA%d(laddr %d tag %d) to idx %d\n", + ctrl->id, dma->laddr, tag, ctrl->cmd.idx); + + ctrl->cmd.idx = (ctrl->cmd.idx + 1) & RSXX_CS_IDX_MASK; + cmds_pending++; + + if (dma->cmd == HW_CMD_BLK_WRITE) + ctrl->stats.writes_issued++; + else if (dma->cmd == HW_CMD_BLK_DISCARD) + ctrl->stats.discards_issued++; + else + ctrl->stats.reads_issued++; + } + + /* Let HW know we've queued commands. */ + if (cmds_pending) { + /* + * We must guarantee that the CPU writes to 'ctrl->cmd.buf' + * (which is in PCI-consistent system-memory) from the loop + * above make it into the coherency domain before the + * following PIO "trigger" updating the cmd.idx. A WMB is + * sufficient. We need not explicitly CPU cache-flush since + * the memory is a PCI-consistent (ie; coherent) mapping. + */ + wmb(); + + atomic_add(cmds_pending, &ctrl->stats.hw_q_depth); + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + } +} + +static void rsxx_dma_done(struct work_struct *work) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + unsigned long flags; + u16 count; + u8 status; + u8 tag; + struct hw_status *hw_st_buf; + + ctrl = container_of(work, struct rsxx_dma_ctrl, dma_done_work); + hw_st_buf = ctrl->status.buf; + + if (unlikely(ctrl->card->halt) || + unlikely(ctrl->card->dma_fault)) + return; + + count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); + + while (count == ctrl->e_cnt) { + /* + * The read memory-barrier is necessary to keep aggressive + * processors/optimizers (such as the PPC Apple G5) from + * reordering the following status-buffer tag & status read + * *before* the count read on subsequent iterations of the + * loop! + */ + rmb(); + + status = hw_st_buf[ctrl->status.idx].status; + tag = hw_st_buf[ctrl->status.idx].tag; + + dma = get_tracker_dma(ctrl->trackers, tag); + if (dma == NULL) { + spin_lock_irqsave(&ctrl->card->irq_lock, flags); + rsxx_disable_ier(ctrl->card, CR_INTR_DMA_ALL); + spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); + + dev_err(CARD_TO_DEV(ctrl->card), + "No tracker for tag %d " + "(idx %d id %d)\n", + tag, ctrl->status.idx, ctrl->id); + return; + } + + dev_dbg(CARD_TO_DEV(ctrl->card), + "Completing DMA%d" + "(laddr x%x tag %d st: x%x cnt: x%04x) from idx %d.\n", + ctrl->id, dma->laddr, tag, status, count, + ctrl->status.idx); + + atomic_dec(&ctrl->stats.hw_q_depth); + + mod_timer(&ctrl->activity_timer, + jiffies + DMA_ACTIVITY_TIMEOUT); + + if (status) + rsxx_handle_dma_error(ctrl, dma, status); + else + rsxx_complete_dma(ctrl->card, dma, 0); + + push_tracker(ctrl->trackers, tag); + + ctrl->status.idx = (ctrl->status.idx + 1) & + RSXX_CS_IDX_MASK; + ctrl->e_cnt++; + + count = le16_to_cpu(hw_st_buf[ctrl->status.idx].count); + } + + dma_intr_coal_auto_tune(ctrl->card); + + if (atomic_read(&ctrl->stats.hw_q_depth) == 0) + del_timer_sync(&ctrl->activity_timer); + + spin_lock_irqsave(&ctrl->card->irq_lock, flags); + rsxx_enable_ier(ctrl->card, CR_INTR_DMA(ctrl->id)); + spin_unlock_irqrestore(&ctrl->card->irq_lock, flags); + + spin_lock(&ctrl->queue_lock); + if (ctrl->stats.sw_q_depth) + queue_work(ctrl->issue_wq, &ctrl->issue_dma_work); + spin_unlock(&ctrl->queue_lock); +} + +static int rsxx_cleanup_dma_queue(struct rsxx_cardinfo *card, + struct list_head *q) +{ + struct rsxx_dma *dma; + struct rsxx_dma *tmp; + int cnt = 0; + + list_for_each_entry_safe(dma, tmp, q, list) { + list_del(&dma->list); + + if (dma->dma_addr) + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + (dma->cmd == HW_CMD_BLK_WRITE) ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + kmem_cache_free(rsxx_dma_pool, dma); + cnt++; + } + + return cnt; +} + +static int rsxx_queue_discard(struct rsxx_cardinfo *card, + struct list_head *q, + unsigned int laddr, + rsxx_dma_cb cb, + void *cb_data) +{ + struct rsxx_dma *dma; + + dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->cmd = HW_CMD_BLK_DISCARD; + dma->laddr = laddr; + dma->dma_addr = 0; + dma->sub_page.off = 0; + dma->sub_page.cnt = 0; + dma->page = NULL; + dma->pg_off = 0; + dma->cb = cb; + dma->cb_data = cb_data; + + dev_dbg(CARD_TO_DEV(card), "Queuing[D] laddr %x\n", dma->laddr); + + list_add_tail(&dma->list, q); + + return 0; +} + +static int rsxx_queue_dma(struct rsxx_cardinfo *card, + struct list_head *q, + int dir, + unsigned int dma_off, + unsigned int dma_len, + unsigned int laddr, + struct page *page, + unsigned int pg_off, + rsxx_dma_cb cb, + void *cb_data) +{ + struct rsxx_dma *dma; + + dma = kmem_cache_alloc(rsxx_dma_pool, GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->dma_addr = pci_map_page(card->dev, page, pg_off, dma_len, + dir ? PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + if (!dma->dma_addr) { + kmem_cache_free(rsxx_dma_pool, dma); + return -ENOMEM; + } + + dma->cmd = dir ? HW_CMD_BLK_WRITE : HW_CMD_BLK_READ; + dma->laddr = laddr; + dma->sub_page.off = (dma_off >> 9); + dma->sub_page.cnt = (dma_len >> 9); + dma->page = page; + dma->pg_off = pg_off; + dma->cb = cb; + dma->cb_data = cb_data; + + dev_dbg(CARD_TO_DEV(card), + "Queuing[%c] laddr %x off %d cnt %d page %p pg_off %d\n", + dir ? 'W' : 'R', dma->laddr, dma->sub_page.off, + dma->sub_page.cnt, dma->page, dma->pg_off); + + /* Queue the DMA */ + list_add_tail(&dma->list, q); + + return 0; +} + +int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, + struct bio *bio, + atomic_t *n_dmas, + rsxx_dma_cb cb, + void *cb_data) +{ + struct list_head dma_list[RSXX_MAX_TARGETS]; + struct bio_vec *bvec; + unsigned long long addr8; + unsigned int laddr; + unsigned int bv_len; + unsigned int bv_off; + unsigned int dma_off; + unsigned int dma_len; + int dma_cnt[RSXX_MAX_TARGETS]; + int tgt; + int st; + int i; + + addr8 = bio->bi_sector << 9; /* sectors are 512 bytes */ + atomic_set(n_dmas, 0); + + for (i = 0; i < card->n_targets; i++) { + INIT_LIST_HEAD(&dma_list[i]); + dma_cnt[i] = 0; + } + + if (bio->bi_rw & REQ_DISCARD) { + bv_len = bio->bi_size; + + while (bv_len > 0) { + tgt = rsxx_get_dma_tgt(card, addr8); + laddr = rsxx_addr8_to_laddr(addr8, card); + + st = rsxx_queue_discard(card, &dma_list[tgt], laddr, + cb, cb_data); + if (st) + goto bvec_err; + + dma_cnt[tgt]++; + atomic_inc(n_dmas); + addr8 += RSXX_HW_BLK_SIZE; + bv_len -= RSXX_HW_BLK_SIZE; + } + } else { + bio_for_each_segment(bvec, bio, i) { + bv_len = bvec->bv_len; + bv_off = bvec->bv_offset; + + while (bv_len > 0) { + tgt = rsxx_get_dma_tgt(card, addr8); + laddr = rsxx_addr8_to_laddr(addr8, card); + dma_off = addr8 & RSXX_HW_BLK_MASK; + dma_len = min(bv_len, + RSXX_HW_BLK_SIZE - dma_off); + + st = rsxx_queue_dma(card, &dma_list[tgt], + bio_data_dir(bio), + dma_off, dma_len, + laddr, bvec->bv_page, + bv_off, cb, cb_data); + if (st) + goto bvec_err; + + dma_cnt[tgt]++; + atomic_inc(n_dmas); + addr8 += dma_len; + bv_off += dma_len; + bv_len -= dma_len; + } + } + } + + for (i = 0; i < card->n_targets; i++) { + if (!list_empty(&dma_list[i])) { + spin_lock(&card->ctrl[i].queue_lock); + card->ctrl[i].stats.sw_q_depth += dma_cnt[i]; + list_splice_tail(&dma_list[i], &card->ctrl[i].queue); + spin_unlock(&card->ctrl[i].queue_lock); + + queue_work(card->ctrl[i].issue_wq, + &card->ctrl[i].issue_dma_work); + } + } + + return 0; + +bvec_err: + for (i = 0; i < card->n_targets; i++) + rsxx_cleanup_dma_queue(card, &dma_list[i]); + + return st; +} + + +/*----------------- DMA Engine Initialization & Setup -------------------*/ +static int rsxx_dma_ctrl_init(struct pci_dev *dev, + struct rsxx_dma_ctrl *ctrl) +{ + int i; + + memset(&ctrl->stats, 0, sizeof(ctrl->stats)); + + ctrl->status.buf = pci_alloc_consistent(dev, STATUS_BUFFER_SIZE8, + &ctrl->status.dma_addr); + ctrl->cmd.buf = pci_alloc_consistent(dev, COMMAND_BUFFER_SIZE8, + &ctrl->cmd.dma_addr); + if (ctrl->status.buf == NULL || ctrl->cmd.buf == NULL) + return -ENOMEM; + + ctrl->trackers = vmalloc(DMA_TRACKER_LIST_SIZE8); + if (!ctrl->trackers) + return -ENOMEM; + + ctrl->trackers->head = 0; + for (i = 0; i < RSXX_MAX_OUTSTANDING_CMDS; i++) { + ctrl->trackers->list[i].next_tag = i + 1; + ctrl->trackers->list[i].dma = NULL; + } + ctrl->trackers->list[RSXX_MAX_OUTSTANDING_CMDS-1].next_tag = -1; + spin_lock_init(&ctrl->trackers->lock); + + spin_lock_init(&ctrl->queue_lock); + INIT_LIST_HEAD(&ctrl->queue); + + setup_timer(&ctrl->activity_timer, dma_engine_stalled, + (unsigned long)ctrl); + + ctrl->issue_wq = alloc_ordered_workqueue(DRIVER_NAME"_issue", 0); + if (!ctrl->issue_wq) + return -ENOMEM; + + ctrl->done_wq = alloc_ordered_workqueue(DRIVER_NAME"_done", 0); + if (!ctrl->done_wq) + return -ENOMEM; + + INIT_WORK(&ctrl->issue_dma_work, rsxx_issue_dmas); + INIT_WORK(&ctrl->dma_done_work, rsxx_dma_done); + + memset(ctrl->status.buf, 0xac, STATUS_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_LO); + iowrite32(upper_32_bits(ctrl->status.dma_addr), + ctrl->regmap + SB_ADD_HI); + + memset(ctrl->cmd.buf, 0x83, COMMAND_BUFFER_SIZE8); + iowrite32(lower_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_LO); + iowrite32(upper_32_bits(ctrl->cmd.dma_addr), ctrl->regmap + CB_ADD_HI); + + ctrl->status.idx = ioread32(ctrl->regmap + HW_STATUS_CNT); + if (ctrl->status.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading status cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->status.idx, ctrl->regmap + HW_STATUS_CNT); + iowrite32(ctrl->status.idx, ctrl->regmap + SW_STATUS_CNT); + + ctrl->cmd.idx = ioread32(ctrl->regmap + HW_CMD_IDX); + if (ctrl->cmd.idx > RSXX_MAX_OUTSTANDING_CMDS) { + dev_crit(&dev->dev, "Failed reading cmd cnt x%x\n", + ctrl->status.idx); + return -EINVAL; + } + iowrite32(ctrl->cmd.idx, ctrl->regmap + HW_CMD_IDX); + iowrite32(ctrl->cmd.idx, ctrl->regmap + SW_CMD_IDX); + + wmb(); + + return 0; +} + +static int rsxx_dma_stripe_setup(struct rsxx_cardinfo *card, + unsigned int stripe_size8) +{ + if (!is_power_of_2(stripe_size8)) { + dev_err(CARD_TO_DEV(card), + "stripe_size is NOT a power of 2!\n"); + return -EINVAL; + } + + card->_stripe.lower_mask = stripe_size8 - 1; + + card->_stripe.upper_mask = ~(card->_stripe.lower_mask); + card->_stripe.upper_shift = ffs(card->n_targets) - 1; + + card->_stripe.target_mask = card->n_targets - 1; + card->_stripe.target_shift = ffs(stripe_size8) - 1; + + dev_dbg(CARD_TO_DEV(card), "_stripe.lower_mask = x%016llx\n", + card->_stripe.lower_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.upper_shift = x%016llx\n", + card->_stripe.upper_shift); + dev_dbg(CARD_TO_DEV(card), "_stripe.upper_mask = x%016llx\n", + card->_stripe.upper_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.target_mask = x%016llx\n", + card->_stripe.target_mask); + dev_dbg(CARD_TO_DEV(card), "_stripe.target_shift = x%016llx\n", + card->_stripe.target_shift); + + return 0; +} + +static int rsxx_dma_configure(struct rsxx_cardinfo *card) +{ + u32 intr_coal; + + intr_coal = dma_intr_coal_val(card->config.data.intr_coal.mode, + card->config.data.intr_coal.count, + card->config.data.intr_coal.latency); + iowrite32(intr_coal, card->regmap + INTR_COAL); + + return rsxx_dma_stripe_setup(card, card->config.data.stripe_size); +} + +int rsxx_dma_setup(struct rsxx_cardinfo *card) +{ + unsigned long flags; + int st; + int i; + + dev_info(CARD_TO_DEV(card), + "Initializing %d DMA targets\n", + card->n_targets); + + /* Regmap is divided up into 4K chunks. One for each DMA channel */ + for (i = 0; i < card->n_targets; i++) + card->ctrl[i].regmap = card->regmap + (i * 4096); + + card->dma_fault = 0; + + /* Reset the DMA queues */ + rsxx_dma_queue_reset(card); + + /************* Setup DMA Control *************/ + for (i = 0; i < card->n_targets; i++) { + st = rsxx_dma_ctrl_init(card->dev, &card->ctrl[i]); + if (st) + goto failed_dma_setup; + + card->ctrl[i].card = card; + card->ctrl[i].id = i; + } + + card->scrub_hard = 1; + + if (card->config_valid) + rsxx_dma_configure(card); + + /* Enable the interrupts after all setup has completed. */ + for (i = 0; i < card->n_targets; i++) { + spin_lock_irqsave(&card->irq_lock, flags); + rsxx_enable_ier_and_isr(card, CR_INTR_DMA(i)); + spin_unlock_irqrestore(&card->irq_lock, flags); + } + + return 0; + +failed_dma_setup: + for (i = 0; i < card->n_targets; i++) { + struct rsxx_dma_ctrl *ctrl = &card->ctrl[i]; + + if (ctrl->issue_wq) { + destroy_workqueue(ctrl->issue_wq); + ctrl->issue_wq = NULL; + } + + if (ctrl->done_wq) { + destroy_workqueue(ctrl->done_wq); + ctrl->done_wq = NULL; + } + + if (ctrl->trackers) + vfree(ctrl->trackers); + + if (ctrl->status.buf) + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, + ctrl->status.dma_addr); + if (ctrl->cmd.buf) + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); + } + + return st; +} + + +void rsxx_dma_destroy(struct rsxx_cardinfo *card) +{ + struct rsxx_dma_ctrl *ctrl; + struct rsxx_dma *dma; + int i, j; + int cnt = 0; + + for (i = 0; i < card->n_targets; i++) { + ctrl = &card->ctrl[i]; + + if (ctrl->issue_wq) { + destroy_workqueue(ctrl->issue_wq); + ctrl->issue_wq = NULL; + } + + if (ctrl->done_wq) { + destroy_workqueue(ctrl->done_wq); + ctrl->done_wq = NULL; + } + + if (timer_pending(&ctrl->activity_timer)) + del_timer_sync(&ctrl->activity_timer); + + /* Clean up the DMA queue */ + spin_lock(&ctrl->queue_lock); + cnt = rsxx_cleanup_dma_queue(card, &ctrl->queue); + spin_unlock(&ctrl->queue_lock); + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d queued DMAs on channel %d\n", + cnt, i); + + /* Clean up issued DMAs */ + for (j = 0; j < RSXX_MAX_OUTSTANDING_CMDS; j++) { + dma = get_tracker_dma(ctrl->trackers, j); + if (dma) { + pci_unmap_page(card->dev, dma->dma_addr, + get_dma_size(dma), + (dma->cmd == HW_CMD_BLK_WRITE) ? + PCI_DMA_TODEVICE : + PCI_DMA_FROMDEVICE); + kmem_cache_free(rsxx_dma_pool, dma); + cnt++; + } + } + + if (cnt) + dev_info(CARD_TO_DEV(card), + "Freed %d pending DMAs on channel %d\n", + cnt, i); + + vfree(ctrl->trackers); + + pci_free_consistent(card->dev, STATUS_BUFFER_SIZE8, + ctrl->status.buf, ctrl->status.dma_addr); + pci_free_consistent(card->dev, COMMAND_BUFFER_SIZE8, + ctrl->cmd.buf, ctrl->cmd.dma_addr); + } +} + + +int rsxx_dma_init(void) +{ + rsxx_dma_pool = KMEM_CACHE(rsxx_dma, SLAB_HWCACHE_ALIGN); + if (!rsxx_dma_pool) + return -ENOMEM; + + return 0; +} + + +void rsxx_dma_cleanup(void) +{ + kmem_cache_destroy(rsxx_dma_pool); +} + diff --git a/drivers/block/rsxx/rsxx.h b/drivers/block/rsxx/rsxx.h new file mode 100644 index 00000000000..2e50b65902b --- /dev/null +++ b/drivers/block/rsxx/rsxx.h @@ -0,0 +1,45 @@ +/* +* Filename: rsxx.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_H__ +#define __RSXX_H__ + +/*----------------- IOCTL Definitions -------------------*/ + +struct rsxx_reg_access { + __u32 addr; + __u32 cnt; + __u32 stat; + __u32 stream; + __u32 data[8]; +}; + +#define RSXX_MAX_REG_CNT (8 * (sizeof(__u32))) + +#define RSXX_IOC_MAGIC 'r' + +#define RSXX_GETREG _IOWR(RSXX_IOC_MAGIC, 0x20, struct rsxx_reg_access) +#define RSXX_SETREG _IOWR(RSXX_IOC_MAGIC, 0x21, struct rsxx_reg_access) + +#endif /* __RSXX_H_ */ diff --git a/drivers/block/rsxx/rsxx_cfg.h b/drivers/block/rsxx/rsxx_cfg.h new file mode 100644 index 00000000000..c025fe5fdb7 --- /dev/null +++ b/drivers/block/rsxx/rsxx_cfg.h @@ -0,0 +1,72 @@ +/* +* Filename: rsXX_cfg.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_CFG_H__ +#define __RSXX_CFG_H__ + +/* NOTE: Config values will be saved in network byte order (i.e. Big endian) */ +#include <linux/types.h> + +/* + * The card config version must match the driver's expected version. If it does + * not, the DMA interfaces will not be attached and the user will need to + * initialize/upgrade the card configuration using the card config utility. + */ +#define RSXX_CFG_VERSION 4 + +struct card_cfg_hdr { + __u32 version; + __u32 crc; +}; + +struct card_cfg_data { + __u32 block_size; + __u32 stripe_size; + __u32 vendor_id; + __u32 cache_order; + struct { + __u32 mode; /* Disabled, manual, auto-tune... */ + __u32 count; /* Number of intr to coalesce */ + __u32 latency;/* Max wait time (in ns) */ + } intr_coal; +}; + +struct rsxx_card_cfg { + struct card_cfg_hdr hdr; + struct card_cfg_data data; +}; + +/* Vendor ID Values */ +#define RSXX_VENDOR_ID_TMS_IBM 0 +#define RSXX_VENDOR_ID_DSI 1 +#define RSXX_VENDOR_COUNT 2 + +/* Interrupt Coalescing Values */ +#define RSXX_INTR_COAL_DISABLED 0 +#define RSXX_INTR_COAL_EXPLICIT 1 +#define RSXX_INTR_COAL_AUTO_TUNE 2 + + +#endif /* __RSXX_CFG_H__ */ + diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h new file mode 100644 index 00000000000..a1ac907d8f4 --- /dev/null +++ b/drivers/block/rsxx/rsxx_priv.h @@ -0,0 +1,399 @@ +/* +* Filename: rsxx_priv.h +* +* +* Authors: Joshua Morris <josh.h.morris@us.ibm.com> +* Philip Kelleher <pjk1939@linux.vnet.ibm.com> +* +* (C) Copyright 2013 IBM Corporation +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License as +* published by the Free Software Foundation; either version 2 of the +* License, or (at your option) any later version. +* +* This program is distributed in the hope that it will be useful, but +* WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +* General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; if not, write to the Free Software Foundation, +* Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +#ifndef __RSXX_PRIV_H__ +#define __RSXX_PRIV_H__ + +#include <linux/version.h> +#include <linux/semaphore.h> + +#include <linux/fs.h> +#include <linux/interrupt.h> +#include <linux/mutex.h> +#include <linux/pci.h> +#include <linux/spinlock.h> +#include <linux/sysfs.h> +#include <linux/workqueue.h> +#include <linux/bio.h> +#include <linux/vmalloc.h> +#include <linux/timer.h> +#include <linux/ioctl.h> + +#include "rsxx.h" +#include "rsxx_cfg.h" + +struct proc_cmd; + +#define PCI_VENDOR_ID_TMS_IBM 0x15B6 +#define PCI_DEVICE_ID_RS70_FLASH 0x0019 +#define PCI_DEVICE_ID_RS70D_FLASH 0x001A +#define PCI_DEVICE_ID_RS80_FLASH 0x001C +#define PCI_DEVICE_ID_RS81_FLASH 0x001E + +#define RS70_PCI_REV_SUPPORTED 4 + +#define DRIVER_NAME "rsxx" +#define DRIVER_VERSION "3.7" + +/* Block size is 4096 */ +#define RSXX_HW_BLK_SHIFT 12 +#define RSXX_HW_BLK_SIZE (1 << RSXX_HW_BLK_SHIFT) +#define RSXX_HW_BLK_MASK (RSXX_HW_BLK_SIZE - 1) + +#define MAX_CREG_DATA8 32 +#define LOG_BUF_SIZE8 128 + +#define RSXX_MAX_OUTSTANDING_CMDS 255 +#define RSXX_CS_IDX_MASK 0xff + +#define RSXX_MAX_TARGETS 8 + +struct dma_tracker_list; + +/* DMA Command/Status Buffer structure */ +struct rsxx_cs_buffer { + dma_addr_t dma_addr; + void *buf; + u32 idx; +}; + +struct rsxx_dma_stats { + u32 crc_errors; + u32 hard_errors; + u32 soft_errors; + u32 writes_issued; + u32 writes_failed; + u32 reads_issued; + u32 reads_failed; + u32 reads_retried; + u32 discards_issued; + u32 discards_failed; + u32 done_rescheduled; + u32 issue_rescheduled; + u32 sw_q_depth; /* Number of DMAs on the SW queue. */ + atomic_t hw_q_depth; /* Number of DMAs queued to HW. */ +}; + +struct rsxx_dma_ctrl { + struct rsxx_cardinfo *card; + int id; + void __iomem *regmap; + struct rsxx_cs_buffer status; + struct rsxx_cs_buffer cmd; + u16 e_cnt; + spinlock_t queue_lock; + struct list_head queue; + struct workqueue_struct *issue_wq; + struct work_struct issue_dma_work; + struct workqueue_struct *done_wq; + struct work_struct dma_done_work; + struct timer_list activity_timer; + struct dma_tracker_list *trackers; + struct rsxx_dma_stats stats; +}; + +struct rsxx_cardinfo { + struct pci_dev *dev; + unsigned int halt; + + void __iomem *regmap; + spinlock_t irq_lock; + unsigned int isr_mask; + unsigned int ier_mask; + + struct rsxx_card_cfg config; + int config_valid; + + /* Embedded CPU Communication */ + struct { + spinlock_t lock; + bool active; + struct creg_cmd *active_cmd; + struct work_struct done_work; + struct list_head queue; + unsigned int q_depth; + /* Cache the creg status to prevent ioreads */ + struct { + u32 stat; + u32 failed_cancel_timer; + u32 creg_timeout; + } creg_stats; + struct timer_list cmd_timer; + struct mutex reset_lock; + int reset; + } creg_ctrl; + + struct { + char tmp[MAX_CREG_DATA8]; + char buf[LOG_BUF_SIZE8]; /* terminated */ + int buf_len; + } log; + + struct work_struct event_work; + unsigned int state; + u64 size8; + + /* Lock the device attach/detach function */ + struct mutex dev_lock; + + /* Block Device Variables */ + bool bdev_attached; + int disk_id; + int major; + struct request_queue *queue; + struct gendisk *gendisk; + struct { + /* Used to convert a byte address to a device address. */ + u64 lower_mask; + u64 upper_shift; + u64 upper_mask; + u64 target_mask; + u64 target_shift; + } _stripe; + unsigned int dma_fault; + + int scrub_hard; + + int n_targets; + struct rsxx_dma_ctrl *ctrl; +}; + +enum rsxx_pci_regmap { + HWID = 0x00, /* Hardware Identification Register */ + SCRATCH = 0x04, /* Scratch/Debug Register */ + RESET = 0x08, /* Reset Register */ + ISR = 0x10, /* Interrupt Status Register */ + IER = 0x14, /* Interrupt Enable Register */ + IPR = 0x18, /* Interrupt Poll Register */ + CB_ADD_LO = 0x20, /* Command Host Buffer Address [31:0] */ + CB_ADD_HI = 0x24, /* Command Host Buffer Address [63:32]*/ + HW_CMD_IDX = 0x28, /* Hardware Processed Command Index */ + SW_CMD_IDX = 0x2C, /* Software Processed Command Index */ + SB_ADD_LO = 0x30, /* Status Host Buffer Address [31:0] */ + SB_ADD_HI = 0x34, /* Status Host Buffer Address [63:32] */ + HW_STATUS_CNT = 0x38, /* Hardware Status Counter */ + SW_STATUS_CNT = 0x3C, /* Deprecated */ + CREG_CMD = 0x40, /* CPU Command Register */ + CREG_ADD = 0x44, /* CPU Address Register */ + CREG_CNT = 0x48, /* CPU Count Register */ + CREG_STAT = 0x4C, /* CPU Status Register */ + CREG_DATA0 = 0x50, /* CPU Data Registers */ + CREG_DATA1 = 0x54, + CREG_DATA2 = 0x58, + CREG_DATA3 = 0x5C, + CREG_DATA4 = 0x60, + CREG_DATA5 = 0x64, + CREG_DATA6 = 0x68, + CREG_DATA7 = 0x6c, + INTR_COAL = 0x70, /* Interrupt Coalescing Register */ + HW_ERROR = 0x74, /* Card Error Register */ + PCI_DEBUG0 = 0x78, /* PCI Debug Registers */ + PCI_DEBUG1 = 0x7C, + PCI_DEBUG2 = 0x80, + PCI_DEBUG3 = 0x84, + PCI_DEBUG4 = 0x88, + PCI_DEBUG5 = 0x8C, + PCI_DEBUG6 = 0x90, + PCI_DEBUG7 = 0x94, + PCI_POWER_THROTTLE = 0x98, + PERF_CTRL = 0x9c, + PERF_TIMER_LO = 0xa0, + PERF_TIMER_HI = 0xa4, + PERF_RD512_LO = 0xa8, + PERF_RD512_HI = 0xac, + PERF_WR512_LO = 0xb0, + PERF_WR512_HI = 0xb4, +}; + +enum rsxx_intr { + CR_INTR_DMA0 = 0x00000001, + CR_INTR_CREG = 0x00000002, + CR_INTR_DMA1 = 0x00000004, + CR_INTR_EVENT = 0x00000008, + CR_INTR_DMA2 = 0x00000010, + CR_INTR_DMA3 = 0x00000020, + CR_INTR_DMA4 = 0x00000040, + CR_INTR_DMA5 = 0x00000080, + CR_INTR_DMA6 = 0x00000100, + CR_INTR_DMA7 = 0x00000200, + CR_INTR_DMA_ALL = 0x000003f5, + CR_INTR_ALL = 0xffffffff, +}; + +static inline int CR_INTR_DMA(int N) +{ + static const unsigned int _CR_INTR_DMA[] = { + CR_INTR_DMA0, CR_INTR_DMA1, CR_INTR_DMA2, CR_INTR_DMA3, + CR_INTR_DMA4, CR_INTR_DMA5, CR_INTR_DMA6, CR_INTR_DMA7 + }; + return _CR_INTR_DMA[N]; +} +enum rsxx_pci_reset { + DMA_QUEUE_RESET = 0x00000001, +}; + +enum rsxx_pci_revision { + RSXX_DISCARD_SUPPORT = 2, +}; + +enum rsxx_creg_cmd { + CREG_CMD_TAG_MASK = 0x0000FF00, + CREG_OP_WRITE = 0x000000C0, + CREG_OP_READ = 0x000000E0, +}; + +enum rsxx_creg_addr { + CREG_ADD_CARD_CMD = 0x80001000, + CREG_ADD_CARD_STATE = 0x80001004, + CREG_ADD_CARD_SIZE = 0x8000100c, + CREG_ADD_CAPABILITIES = 0x80001050, + CREG_ADD_LOG = 0x80002000, + CREG_ADD_NUM_TARGETS = 0x80003000, + CREG_ADD_CONFIG = 0xB0000000, +}; + +enum rsxx_creg_card_cmd { + CARD_CMD_STARTUP = 1, + CARD_CMD_SHUTDOWN = 2, + CARD_CMD_LOW_LEVEL_FORMAT = 3, + CARD_CMD_FPGA_RECONFIG_BR = 4, + CARD_CMD_FPGA_RECONFIG_MAIN = 5, + CARD_CMD_BACKUP = 6, + CARD_CMD_RESET = 7, + CARD_CMD_deprecated = 8, + CARD_CMD_UNINITIALIZE = 9, + CARD_CMD_DSTROY_EMERGENCY = 10, + CARD_CMD_DSTROY_NORMAL = 11, + CARD_CMD_DSTROY_EXTENDED = 12, + CARD_CMD_DSTROY_ABORT = 13, +}; + +enum rsxx_card_state { + CARD_STATE_SHUTDOWN = 0x00000001, + CARD_STATE_STARTING = 0x00000002, + CARD_STATE_FORMATTING = 0x00000004, + CARD_STATE_UNINITIALIZED = 0x00000008, + CARD_STATE_GOOD = 0x00000010, + CARD_STATE_SHUTTING_DOWN = 0x00000020, + CARD_STATE_FAULT = 0x00000040, + CARD_STATE_RD_ONLY_FAULT = 0x00000080, + CARD_STATE_DSTROYING = 0x00000100, +}; + +enum rsxx_led { + LED_DEFAULT = 0x0, + LED_IDENTIFY = 0x1, + LED_SOAK = 0x2, +}; + +enum rsxx_creg_flash_lock { + CREG_FLASH_LOCK = 1, + CREG_FLASH_UNLOCK = 2, +}; + +enum rsxx_card_capabilities { + CARD_CAP_SUBPAGE_WRITES = 0x00000080, +}; + +enum rsxx_creg_stat { + CREG_STAT_STATUS_MASK = 0x00000003, + CREG_STAT_SUCCESS = 0x1, + CREG_STAT_ERROR = 0x2, + CREG_STAT_CHAR_PENDING = 0x00000004, /* Character I/O pending bit */ + CREG_STAT_LOG_PENDING = 0x00000008, /* HW log message pending bit */ + CREG_STAT_TAG_MASK = 0x0000ff00, +}; + +static inline unsigned int CREG_DATA(int N) +{ + return CREG_DATA0 + (N << 2); +} + +/*----------------- Convenient Log Wrappers -------------------*/ +#define CARD_TO_DEV(__CARD) (&(__CARD)->dev->dev) + +/***** config.c *****/ +int rsxx_load_config(struct rsxx_cardinfo *card); + +/***** core.c *****/ +void rsxx_enable_ier(struct rsxx_cardinfo *card, unsigned int intr); +void rsxx_disable_ier(struct rsxx_cardinfo *card, unsigned int intr); +void rsxx_enable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr); +void rsxx_disable_ier_and_isr(struct rsxx_cardinfo *card, + unsigned int intr); + +/***** dev.c *****/ +int rsxx_attach_dev(struct rsxx_cardinfo *card); +void rsxx_detach_dev(struct rsxx_cardinfo *card); +int rsxx_setup_dev(struct rsxx_cardinfo *card); +void rsxx_destroy_dev(struct rsxx_cardinfo *card); +int rsxx_dev_init(void); +void rsxx_dev_cleanup(void); + +/***** dma.c ****/ +typedef void (*rsxx_dma_cb)(struct rsxx_cardinfo *card, + void *cb_data, + unsigned int status); +int rsxx_dma_setup(struct rsxx_cardinfo *card); +void rsxx_dma_destroy(struct rsxx_cardinfo *card); +int rsxx_dma_init(void); +void rsxx_dma_cleanup(void); +int rsxx_dma_queue_bio(struct rsxx_cardinfo *card, + struct bio *bio, + atomic_t *n_dmas, + rsxx_dma_cb cb, + void *cb_data); + +/***** cregs.c *****/ +int rsxx_creg_write(struct rsxx_cardinfo *card, u32 addr, + unsigned int size8, + void *data, + int byte_stream); +int rsxx_creg_read(struct rsxx_cardinfo *card, + u32 addr, + unsigned int size8, + void *data, + int byte_stream); +int rsxx_read_hw_log(struct rsxx_cardinfo *card); +int rsxx_get_card_state(struct rsxx_cardinfo *card, + unsigned int *state); +int rsxx_get_card_size8(struct rsxx_cardinfo *card, u64 *size8); +int rsxx_get_num_targets(struct rsxx_cardinfo *card, + unsigned int *n_targets); +int rsxx_get_card_capabilities(struct rsxx_cardinfo *card, + u32 *capabilities); +int rsxx_issue_card_cmd(struct rsxx_cardinfo *card, u32 cmd); +int rsxx_creg_setup(struct rsxx_cardinfo *card); +void rsxx_creg_destroy(struct rsxx_cardinfo *card); +int rsxx_creg_init(void); +void rsxx_creg_cleanup(void); + +int rsxx_reg_access(struct rsxx_cardinfo *card, + struct rsxx_reg_access __user *ucmd, + int read); + + + +#endif /* __DRIVERS_BLOCK_RSXX_H__ */ diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 57763c54363..758f2ac878c 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1090,10 +1090,13 @@ static const struct block_device_operations floppy_fops = { static void swim3_mb_event(struct macio_dev* mdev, int mb_state) { struct floppy_state *fs = macio_get_drvdata(mdev); - struct swim3 __iomem *sw = fs->swim3; + struct swim3 __iomem *sw; if (!fs) return; + + sw = fs->swim3; + if (mb_state != MB_FD) return; diff --git a/drivers/block/xd.c b/drivers/block/xd.c deleted file mode 100644 index ff540520bad..00000000000 --- a/drivers/block/xd.c +++ /dev/null @@ -1,1123 +0,0 @@ -/* - * This file contains the driver for an XT hard disk controller - * (at least the DTC 5150X) for Linux. - * - * Author: Pat Mackinlay, pat@it.com.au - * Date: 29/09/92 - * - * Revised: 01/01/93, ... - * - * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, - * kevinf@agora.rain.com) - * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and - * Wim Van Dorst. - * - * Revised: 04/04/94 by Risto Kankkunen - * Moved the detection code from xd_init() to xd_geninit() as it needed - * interrupts enabled and Linus didn't want to enable them in that first - * phase. xd_geninit() is the place to do these kinds of things anyway, - * he says. - * - * Modularized: 04/10/96 by Todd Fries, tfries@umr.edu - * - * Revised: 13/12/97 by Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl - * Fixed some problems with disk initialization and module initiation. - * Added support for manual geometry setting (except Seagate controllers) - * in form: - * xd_geo=<cyl_xda>,<head_xda>,<sec_xda>[,<cyl_xdb>,<head_xdb>,<sec_xdb>] - * Recovered DMA access. Abridged messages. Added support for DTC5051CX, - * WD1002-27X & XEBEC controllers. Driver uses now some jumper settings. - * Extended ioctl() support. - * - * Bugfix: 15/02/01, Paul G. - inform queue layer of tiny xd_maxsect. - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/timer.h> -#include <linux/genhd.h> -#include <linux/hdreg.h> -#include <linux/ioport.h> -#include <linux/init.h> -#include <linux/wait.h> -#include <linux/blkdev.h> -#include <linux/mutex.h> -#include <linux/blkpg.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/gfp.h> - -#include <asm/uaccess.h> -#include <asm/dma.h> - -#include "xd.h" - -static DEFINE_MUTEX(xd_mutex); -static void __init do_xd_setup (int *integers); -#ifdef MODULE -static int xd[5] = { -1,-1,-1,-1, }; -#endif - -#define XD_DONT_USE_DMA 0 /* Initial value. may be overriden using - "nodma" module option */ -#define XD_INIT_DISK_DELAY (30) /* 30 ms delay during disk initialization */ - -/* Above may need to be increased if a problem with the 2nd drive detection - (ST11M controller) or resetting a controller (WD) appears */ - -static XD_INFO xd_info[XD_MAXDRIVES]; - -/* If you try this driver and find that your card is not detected by the driver at bootup, you need to add your BIOS - signature and details to the following list of signatures. A BIOS signature is a string embedded into the first - few bytes of your controller's on-board ROM BIOS. To find out what yours is, use something like MS-DOS's DEBUG - command. Run DEBUG, and then you can examine your BIOS signature with: - - d xxxx:0000 - - where xxxx is the segment of your controller (like C800 or D000 or something). On the ASCII dump at the right, you should - be able to see a string mentioning the manufacturer's copyright etc. Add this string into the table below. The parameters - in the table are, in order: - - offset ; this is the offset (in bytes) from the start of your ROM where the signature starts - signature ; this is the actual text of the signature - xd_?_init_controller ; this is the controller init routine used by your controller - xd_?_init_drive ; this is the drive init routine used by your controller - - The controllers directly supported at the moment are: DTC 5150x, WD 1004A27X, ST11M/R and override. If your controller is - made by the same manufacturer as one of these, try using the same init routines as they do. If that doesn't work, your - best bet is to use the "override" routines. These routines use a "portable" method of getting the disk's geometry, and - may work with your card. If none of these seem to work, try sending me some email and I'll see what I can do <grin>. - - NOTE: You can now specify your XT controller's parameters from the command line in the form xd=TYPE,IRQ,IO,DMA. The driver - should be able to detect your drive's geometry from this info. (eg: xd=0,5,0x320,3 is the "standard"). */ - -#include <asm/page.h> -#define xd_dma_mem_alloc(size) __get_dma_pages(GFP_KERNEL,get_order(size)) -#define xd_dma_mem_free(addr, size) free_pages(addr, get_order(size)) -static char *xd_dma_buffer; - -static XD_SIGNATURE xd_sigs[] __initdata = { - { 0x0000,"Override geometry handler",NULL,xd_override_init_drive,"n unknown" }, /* Pat Mackinlay, pat@it.com.au */ - { 0x0008,"[BXD06 (C) DTC 17-MAY-1985]",xd_dtc_init_controller,xd_dtc5150cx_init_drive," DTC 5150CX" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x000B,"CRD18A Not an IBM rom. (C) Copyright Data Technology Corp. 05/31/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Todd Fries, tfries@umr.edu */ - { 0x000B,"CXD23A Not an IBM ROM (C)Copyright Data Technology Corp 12/03/88",xd_dtc_init_controller,xd_dtc_init_drive," DTC 5150X" }, /* Pat Mackinlay, pat@it.com.au */ - { 0x0008,"07/15/86(C) Copyright 1986 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. 1002-27X" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x0008,"06/24/88(C) Copyright 1988 Western Digital Corp.",xd_wd_init_controller,xd_wd_init_drive," Western Dig. WDXT-GEN2" }, /* Dan Newcombe, newcombe@aa.csc.peachnet.edu */ - { 0x0015,"SEAGATE ST11 BIOS REVISION",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Salvador Abreu, spa@fct.unl.pt */ - { 0x0010,"ST11R BIOS",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11M/R" }, /* Risto Kankkunen, risto.kankkunen@cs.helsinki.fi */ - { 0x0010,"ST11 BIOS v1.7",xd_seagate_init_controller,xd_seagate_init_drive," Seagate ST11R" }, /* Alan Hourihane, alanh@fairlite.demon.co.uk */ - { 0x1000,"(c)Copyright 1987 SMS",xd_omti_init_controller,xd_omti_init_drive,"n OMTI 5520" }, /* Dirk Melchers, dirk@merlin.nbg.sub.org */ - { 0x0006,"COPYRIGHT XEBEC (C) 1984",xd_xebec_init_controller,xd_xebec_init_drive," XEBEC" }, /* Andrzej Krzysztofowicz, ankry@mif.pg.gda.pl */ - { 0x0008,"(C) Copyright 1984 Western Digital Corp", xd_wd_init_controller, xd_wd_init_drive," Western Dig. 1002s-wx2" }, - { 0x0008,"(C) Copyright 1986 Western Digital Corporation", xd_wd_init_controller, xd_wd_init_drive," 1986 Western Digital" }, /* jfree@sovereign.org */ -}; - -static unsigned int xd_bases[] __initdata = -{ - 0xC8000, 0xCA000, 0xCC000, - 0xCE000, 0xD0000, 0xD2000, - 0xD4000, 0xD6000, 0xD8000, - 0xDA000, 0xDC000, 0xDE000, - 0xE0000 -}; - -static DEFINE_SPINLOCK(xd_lock); - -static struct gendisk *xd_gendisk[2]; - -static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo); - -static const struct block_device_operations xd_fops = { - .owner = THIS_MODULE, - .ioctl = xd_ioctl, - .getgeo = xd_getgeo, -}; -static DECLARE_WAIT_QUEUE_HEAD(xd_wait_int); -static u_char xd_drives, xd_irq = 5, xd_dma = 3, xd_maxsectors; -static u_char xd_override __initdata = 0, xd_type __initdata = 0; -static u_short xd_iobase = 0x320; -static int xd_geo[XD_MAXDRIVES*3] __initdata = { 0, }; - -static volatile int xdc_busy; -static struct timer_list xd_watchdog_int; - -static volatile u_char xd_error; -static bool nodma = XD_DONT_USE_DMA; - -static struct request_queue *xd_queue; - -/* xd_init: register the block device number and set up pointer tables */ -static int __init xd_init(void) -{ - u_char i,controller; - unsigned int address; - int err; - -#ifdef MODULE - { - u_char count = 0; - for (i = 4; i > 0; i--) - if (((xd[i] = xd[i-1]) >= 0) && !count) - count = i; - if ((xd[0] = count)) - do_xd_setup(xd); - } -#endif - - init_timer (&xd_watchdog_int); xd_watchdog_int.function = xd_watchdog; - - err = -EBUSY; - if (register_blkdev(XT_DISK_MAJOR, "xd")) - goto out1; - - err = -ENOMEM; - xd_queue = blk_init_queue(do_xd_request, &xd_lock); - if (!xd_queue) - goto out1a; - - if (xd_detect(&controller,&address)) { - - printk("Detected a%s controller (type %d) at address %06x\n", - xd_sigs[controller].name,controller,address); - if (!request_region(xd_iobase,4,"xd")) { - printk("xd: Ports at 0x%x are not available\n", - xd_iobase); - goto out2; - } - if (controller) - xd_sigs[controller].init_controller(address); - xd_drives = xd_initdrives(xd_sigs[controller].init_drive); - - printk("Detected %d hard drive%s (using IRQ%d & DMA%d)\n", - xd_drives,xd_drives == 1 ? "" : "s",xd_irq,xd_dma); - } - - /* - * With the drive detected, xd_maxsectors should now be known. - * If xd_maxsectors is 0, nothing was detected and we fall through - * to return -ENODEV - */ - if (!xd_dma_buffer && xd_maxsectors) { - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - if (!xd_dma_buffer) { - printk(KERN_ERR "xd: Out of memory.\n"); - goto out3; - } - } - - err = -ENODEV; - if (!xd_drives) - goto out3; - - for (i = 0; i < xd_drives; i++) { - XD_INFO *p = &xd_info[i]; - struct gendisk *disk = alloc_disk(64); - if (!disk) - goto Enomem; - p->unit = i; - disk->major = XT_DISK_MAJOR; - disk->first_minor = i<<6; - sprintf(disk->disk_name, "xd%c", i+'a'); - disk->fops = &xd_fops; - disk->private_data = p; - disk->queue = xd_queue; - set_capacity(disk, p->heads * p->cylinders * p->sectors); - printk(" %s: CHS=%d/%d/%d\n", disk->disk_name, - p->cylinders, p->heads, p->sectors); - xd_gendisk[i] = disk; - } - - err = -EBUSY; - if (request_irq(xd_irq,xd_interrupt_handler, 0, "XT hard disk", NULL)) { - printk("xd: unable to get IRQ%d\n",xd_irq); - goto out4; - } - - if (request_dma(xd_dma,"xd")) { - printk("xd: unable to get DMA%d\n",xd_dma); - goto out5; - } - - /* xd_maxsectors depends on controller - so set after detection */ - blk_queue_max_hw_sectors(xd_queue, xd_maxsectors); - - for (i = 0; i < xd_drives; i++) - add_disk(xd_gendisk[i]); - - return 0; - -out5: - free_irq(xd_irq, NULL); -out4: - for (i = 0; i < xd_drives; i++) - put_disk(xd_gendisk[i]); -out3: - if (xd_maxsectors) - release_region(xd_iobase,4); - - if (xd_dma_buffer) - xd_dma_mem_free((unsigned long)xd_dma_buffer, - xd_maxsectors * 0x200); -out2: - blk_cleanup_queue(xd_queue); -out1a: - unregister_blkdev(XT_DISK_MAJOR, "xd"); -out1: - return err; -Enomem: - err = -ENOMEM; - while (i--) - put_disk(xd_gendisk[i]); - goto out3; -} - -/* xd_detect: scan the possible BIOS ROM locations for the signature strings */ -static u_char __init xd_detect (u_char *controller, unsigned int *address) -{ - int i, j; - - if (xd_override) - { - *controller = xd_type; - *address = 0; - return(1); - } - - for (i = 0; i < ARRAY_SIZE(xd_bases); i++) { - void __iomem *p = ioremap(xd_bases[i], 0x2000); - if (!p) - continue; - for (j = 1; j < ARRAY_SIZE(xd_sigs); j++) { - const char *s = xd_sigs[j].string; - if (check_signature(p + xd_sigs[j].offset, s, strlen(s))) { - *controller = j; - xd_type = j; - *address = xd_bases[i]; - iounmap(p); - return 1; - } - } - iounmap(p); - } - return 0; -} - -/* do_xd_request: handle an incoming request */ -static void do_xd_request (struct request_queue * q) -{ - struct request *req; - - if (xdc_busy) - return; - - req = blk_fetch_request(q); - while (req) { - unsigned block = blk_rq_pos(req); - unsigned count = blk_rq_cur_sectors(req); - XD_INFO *disk = req->rq_disk->private_data; - int res = -EIO; - int retry; - - if (req->cmd_type != REQ_TYPE_FS) - goto done; - if (block + count > get_capacity(req->rq_disk)) - goto done; - for (retry = 0; (retry < XD_RETRIES) && !res; retry++) - res = xd_readwrite(rq_data_dir(req), disk, req->buffer, - block, count); - done: - /* wrap up, 0 = success, -errno = fail */ - if (!__blk_end_request_cur(req, res)) - req = blk_fetch_request(q); - } -} - -static int xd_getgeo(struct block_device *bdev, struct hd_geometry *geo) -{ - XD_INFO *p = bdev->bd_disk->private_data; - - geo->heads = p->heads; - geo->sectors = p->sectors; - geo->cylinders = p->cylinders; - return 0; -} - -/* xd_ioctl: handle device ioctl's */ -static int xd_locked_ioctl(struct block_device *bdev, fmode_t mode, u_int cmd, u_long arg) -{ - switch (cmd) { - case HDIO_SET_DMA: - if (!capable(CAP_SYS_ADMIN)) return -EACCES; - if (xdc_busy) return -EBUSY; - nodma = !arg; - if (nodma && xd_dma_buffer) { - xd_dma_mem_free((unsigned long)xd_dma_buffer, - xd_maxsectors * 0x200); - xd_dma_buffer = NULL; - } else if (!nodma && !xd_dma_buffer) { - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - if (!xd_dma_buffer) { - nodma = XD_DONT_USE_DMA; - return -ENOMEM; - } - } - return 0; - case HDIO_GET_DMA: - return put_user(!nodma, (long __user *) arg); - case HDIO_GET_MULTCOUNT: - return put_user(xd_maxsectors, (long __user *) arg); - default: - return -EINVAL; - } -} - -static int xd_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long param) -{ - int ret; - - mutex_lock(&xd_mutex); - ret = xd_locked_ioctl(bdev, mode, cmd, param); - mutex_unlock(&xd_mutex); - - return ret; -} - -/* xd_readwrite: handle a read/write request */ -static int xd_readwrite (u_char operation,XD_INFO *p,char *buffer,u_int block,u_int count) -{ - int drive = p->unit; - u_char cmdblk[6],sense[4]; - u_short track,cylinder; - u_char head,sector,control,mode = PIO_MODE,temp; - char **real_buffer; - register int i; - -#ifdef DEBUG_READWRITE - printk("xd_readwrite: operation = %s, drive = %d, buffer = 0x%X, block = %d, count = %d\n",operation == READ ? "read" : "write",drive,buffer,block,count); -#endif /* DEBUG_READWRITE */ - - spin_unlock_irq(&xd_lock); - - control = p->control; - if (!xd_dma_buffer) - xd_dma_buffer = (char *)xd_dma_mem_alloc(xd_maxsectors * 0x200); - while (count) { - temp = count < xd_maxsectors ? count : xd_maxsectors; - - track = block / p->sectors; - head = track % p->heads; - cylinder = track / p->heads; - sector = block % p->sectors; - -#ifdef DEBUG_READWRITE - printk("xd_readwrite: drive = %d, head = %d, cylinder = %d, sector = %d, count = %d\n",drive,head,cylinder,sector,temp); -#endif /* DEBUG_READWRITE */ - - if (xd_dma_buffer) { - mode = xd_setup_dma(operation == READ ? DMA_MODE_READ : DMA_MODE_WRITE,(u_char *)(xd_dma_buffer),temp * 0x200); - real_buffer = &xd_dma_buffer; - for (i=0; i < (temp * 0x200); i++) - xd_dma_buffer[i] = buffer[i]; - } - else - real_buffer = &buffer; - - xd_build(cmdblk,operation == READ ? CMD_READ : CMD_WRITE,drive,head,cylinder,sector,temp & 0xFF,control); - - switch (xd_command(cmdblk,mode,(u_char *)(*real_buffer),(u_char *)(*real_buffer),sense,XD_TIMEOUT)) { - case 1: - printk("xd%c: %s timeout, recalibrating drive\n",'a'+drive,(operation == READ ? "read" : "write")); - xd_recalibrate(drive); - spin_lock_irq(&xd_lock); - return -EIO; - case 2: - if (sense[0] & 0x30) { - printk("xd%c: %s - ",'a'+drive,(operation == READ ? "reading" : "writing")); - switch ((sense[0] & 0x30) >> 4) { - case 0: printk("drive error, code = 0x%X",sense[0] & 0x0F); - break; - case 1: printk("controller error, code = 0x%X",sense[0] & 0x0F); - break; - case 2: printk("command error, code = 0x%X",sense[0] & 0x0F); - break; - case 3: printk("miscellaneous error, code = 0x%X",sense[0] & 0x0F); - break; - } - } - if (sense[0] & 0x80) - printk(" - CHS = %d/%d/%d\n",((sense[2] & 0xC0) << 2) | sense[3],sense[1] & 0x1F,sense[2] & 0x3F); - /* reported drive number = (sense[1] & 0xE0) >> 5 */ - else - printk(" - no valid disk address\n"); - spin_lock_irq(&xd_lock); - return -EIO; - } - if (xd_dma_buffer) - for (i=0; i < (temp * 0x200); i++) - buffer[i] = xd_dma_buffer[i]; - - count -= temp, buffer += temp * 0x200, block += temp; - } - spin_lock_irq(&xd_lock); - return 0; -} - -/* xd_recalibrate: recalibrate a given drive and reset controller if necessary */ -static void xd_recalibrate (u_char drive) -{ - u_char cmdblk[6]; - - xd_build(cmdblk,CMD_RECALIBRATE,drive,0,0,0,0,0); - if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 8)) - printk("xd%c: warning! error recalibrating, controller may be unstable\n", 'a'+drive); -} - -/* xd_interrupt_handler: interrupt service routine */ -static irqreturn_t xd_interrupt_handler(int irq, void *dev_id) -{ - if (inb(XD_STATUS) & STAT_INTERRUPT) { /* check if it was our device */ -#ifdef DEBUG_OTHER - printk("xd_interrupt_handler: interrupt detected\n"); -#endif /* DEBUG_OTHER */ - outb(0,XD_CONTROL); /* acknowledge interrupt */ - wake_up(&xd_wait_int); /* and wake up sleeping processes */ - return IRQ_HANDLED; - } - else - printk("xd: unexpected interrupt\n"); - return IRQ_NONE; -} - -/* xd_setup_dma: set up the DMA controller for a data transfer */ -static u_char xd_setup_dma (u_char mode,u_char *buffer,u_int count) -{ - unsigned long f; - - if (nodma) - return (PIO_MODE); - if (((unsigned long) buffer & 0xFFFF0000) != (((unsigned long) buffer + count) & 0xFFFF0000)) { -#ifdef DEBUG_OTHER - printk("xd_setup_dma: using PIO, transfer overlaps 64k boundary\n"); -#endif /* DEBUG_OTHER */ - return (PIO_MODE); - } - - f=claim_dma_lock(); - disable_dma(xd_dma); - clear_dma_ff(xd_dma); - set_dma_mode(xd_dma,mode); - set_dma_addr(xd_dma, (unsigned long) buffer); - set_dma_count(xd_dma,count); - - release_dma_lock(f); - - return (DMA_MODE); /* use DMA and INT */ -} - -/* xd_build: put stuff into an array in a format suitable for the controller */ -static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control) -{ - cmdblk[0] = command; - cmdblk[1] = ((drive & 0x07) << 5) | (head & 0x1F); - cmdblk[2] = ((cylinder & 0x300) >> 2) | (sector & 0x3F); - cmdblk[3] = cylinder & 0xFF; - cmdblk[4] = count; - cmdblk[5] = control; - - return (cmdblk); -} - -static void xd_watchdog (unsigned long unused) -{ - xd_error = 1; - wake_up(&xd_wait_int); -} - -/* xd_waitport: waits until port & mask == flags or a timeout occurs. return 1 for a timeout */ -static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout) -{ - u_long expiry = jiffies + timeout; - int success; - - xdc_busy = 1; - while ((success = ((inb(port) & mask) != flags)) && time_before(jiffies, expiry)) - schedule_timeout_uninterruptible(1); - xdc_busy = 0; - return (success); -} - -static inline u_int xd_wait_for_IRQ (void) -{ - unsigned long flags; - xd_watchdog_int.expires = jiffies + 8 * HZ; - add_timer(&xd_watchdog_int); - - flags=claim_dma_lock(); - enable_dma(xd_dma); - release_dma_lock(flags); - - sleep_on(&xd_wait_int); - del_timer(&xd_watchdog_int); - xdc_busy = 0; - - flags=claim_dma_lock(); - disable_dma(xd_dma); - release_dma_lock(flags); - - if (xd_error) { - printk("xd: missed IRQ - command aborted\n"); - xd_error = 0; - return (1); - } - return (0); -} - -/* xd_command: handle all data transfers necessary for a single command */ -static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout) -{ - u_char cmdblk[6],csb,complete = 0; - -#ifdef DEBUG_COMMAND - printk("xd_command: command = 0x%X, mode = 0x%X, indata = 0x%X, outdata = 0x%X, sense = 0x%X\n",command,mode,indata,outdata,sense); -#endif /* DEBUG_COMMAND */ - - outb(0,XD_SELECT); - outb(mode,XD_CONTROL); - - if (xd_waitport(XD_STATUS,STAT_SELECT,STAT_SELECT,timeout)) - return (1); - - while (!complete) { - if (xd_waitport(XD_STATUS,STAT_READY,STAT_READY,timeout)) - return (1); - - switch (inb(XD_STATUS) & (STAT_COMMAND | STAT_INPUT)) { - case 0: - if (mode == DMA_MODE) { - if (xd_wait_for_IRQ()) - return (1); - } else - outb(outdata ? *outdata++ : 0,XD_DATA); - break; - case STAT_INPUT: - if (mode == DMA_MODE) { - if (xd_wait_for_IRQ()) - return (1); - } else - if (indata) - *indata++ = inb(XD_DATA); - else - inb(XD_DATA); - break; - case STAT_COMMAND: - outb(command ? *command++ : 0,XD_DATA); - break; - case STAT_COMMAND | STAT_INPUT: - complete = 1; - break; - } - } - csb = inb(XD_DATA); - - if (xd_waitport(XD_STATUS,0,STAT_SELECT,timeout)) /* wait until deselected */ - return (1); - - if (csb & CSB_ERROR) { /* read sense data if error */ - xd_build(cmdblk,CMD_SENSE,(csb & CSB_LUN) >> 5,0,0,0,0,0); - if (xd_command(cmdblk,0,sense,NULL,NULL,XD_TIMEOUT)) - printk("xd: warning! sense command failed!\n"); - } - -#ifdef DEBUG_COMMAND - printk("xd_command: completed with csb = 0x%X\n",csb); -#endif /* DEBUG_COMMAND */ - - return (csb & CSB_ERROR); -} - -static u_char __init xd_initdrives (void (*init_drive)(u_char drive)) -{ - u_char cmdblk[6],i,count = 0; - - for (i = 0; i < XD_MAXDRIVES; i++) { - xd_build(cmdblk,CMD_TESTREADY,i,0,0,0,0,0); - if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT*8)) { - msleep_interruptible(XD_INIT_DISK_DELAY); - - init_drive(count); - count++; - - msleep_interruptible(XD_INIT_DISK_DELAY); - } - } - return (count); -} - -static void __init xd_manual_geo_set (u_char drive) -{ - xd_info[drive].heads = (u_char)(xd_geo[3 * drive + 1]); - xd_info[drive].cylinders = (u_short)(xd_geo[3 * drive]); - xd_info[drive].sectors = (u_char)(xd_geo[3 * drive + 2]); -} - -static void __init xd_dtc_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xCA000: xd_iobase = 0x324; - case 0xD0000: /*5150CX*/ - case 0xD8000: break; /*5150CX & 5150XL*/ - default: printk("xd_dtc_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x01; /* my card seems to have trouble doing multi-block transfers? */ - - outb(0,XD_RESET); /* reset the controller */ -} - - -static void __init xd_dtc5150cx_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS chip may be removed */ - static u_short geometry_table[][4] = { - {0x200,8,0x200,0x100}, - {0x267,2,0x267,0x267}, - {0x264,4,0x264,0x80}, - {0x132,4,0x132,0x0}, - {0x132,2,0x80, 0x132}, - {0x177,8,0x177,0x0}, - {0x132,8,0x84, 0x0}, - {}, /* not used */ - {0x132,6,0x80, 0x100}, - {0x200,6,0x100,0x100}, - {0x264,2,0x264,0x80}, - {0x280,4,0x280,0x100}, - {0x2B9,3,0x2B9,0x2B9}, - {0x2B9,5,0x2B9,0x2B9}, - {0x280,6,0x280,0x100}, - {0x132,4,0x132,0x0}}; - u_char n; - - n = inb(XD_JUMPER); - n = (drive ? n : (n >> 2)) & 0x33; - n = (n | (n >> 2)) & 0x0F; - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else - if (n != 7) { - xd_info[drive].heads = (u_char)(geometry_table[n][1]); /* heads */ - xd_info[drive].cylinders = geometry_table[n][0]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; /* reduced write */ - xd_info[drive].precomp = geometry_table[n][3] /* write precomp */ - xd_info[drive].ecc = 0x0B; /* ecc length */ -#endif /* 0 */ - } - else { - printk("xd%c: undetermined drive geometry\n",'a'+drive); - return; - } - xd_info[drive].control = 5; /* control byte */ - xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B); - xd_recalibrate(drive); -} - -static void __init xd_dtc_init_drive (u_char drive) -{ - u_char cmdblk[6],buf[64]; - - xd_build(cmdblk,CMD_DTCGETGEOM,drive,0,0,0,0,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x0A]; /* heads */ - xd_info[drive].cylinders = ((u_short *) (buf))[0x04]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); -#if 0 - xd_info[drive].rwrite = ((u_short *) (buf + 1))[0x05]; /* reduced write */ - xd_info[drive].precomp = ((u_short *) (buf + 1))[0x06]; /* write precomp */ - xd_info[drive].ecc = buf[0x0F]; /* ecc length */ -#endif /* 0 */ - xd_info[drive].control = 0; /* control byte */ - - xd_setparam(CMD_DTCSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,((u_short *) (buf + 1))[0x05],((u_short *) (buf + 1))[0x06],buf[0x0F]); - xd_build(cmdblk,CMD_DTCSETSTEP,drive,0,0,0,0,7); - if (xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2)) - printk("xd_dtc_init_drive: error setting step rate for xd%c\n", 'a'+drive); - } - else - printk("xd_dtc_init_drive: error reading geometry for xd%c\n", 'a'+drive); -} - -static void __init xd_wd_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xCA000: xd_iobase = 0x324; break; - case 0xCC000: xd_iobase = 0x328; break; - case 0xCE000: xd_iobase = 0x32C; break; - case 0xD0000: xd_iobase = 0x328; break; /* ? */ - case 0xD8000: xd_iobase = 0x32C; break; /* ? */ - default: printk("xd_wd_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x01; /* this one doesn't wrap properly either... */ - - outb(0,XD_RESET); /* reset the controller */ - - msleep(XD_INIT_DISK_DELAY); -} - -static void __init xd_wd_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS may be disabled */ - static u_short geometry_table[][4] = { - {0x264,4,0x1C2,0x1C2}, /* common part */ - {0x132,4,0x099,0x0}, - {0x267,2,0x1C2,0x1C2}, - {0x267,4,0x1C2,0x1C2}, - - {0x334,6,0x335,0x335}, /* 1004 series RLL */ - {0x30E,4,0x30F,0x3DC}, - {0x30E,2,0x30F,0x30F}, - {0x267,4,0x268,0x268}, - - {0x3D5,5,0x3D6,0x3D6}, /* 1002 series RLL */ - {0x3DB,7,0x3DC,0x3DC}, - {0x264,4,0x265,0x265}, - {0x267,4,0x268,0x268}}; - - u_char cmdblk[6],buf[0x200]; - u_char n = 0,rll,jumper_state,use_jumper_geo; - u_char wd_1002 = (xd_sigs[xd_type].string[7] == '6'); - - jumper_state = ~(inb(0x322)); - if (jumper_state & 0x40) - xd_irq = 9; - rll = (jumper_state & 0x30) ? (0x04 << wd_1002) : 0; - xd_build(cmdblk,CMD_READ,drive,0,0,0,1,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x1AF]; /* heads */ - xd_info[drive].cylinders = ((u_short *) (buf + 1))[0xD6]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); -#if 0 - xd_info[drive].rwrite = ((u_short *) (buf))[0xD8]; /* reduced write */ - xd_info[drive].wprecomp = ((u_short *) (buf))[0xDA]; /* write precomp */ - xd_info[drive].ecc = buf[0x1B4]; /* ecc length */ -#endif /* 0 */ - xd_info[drive].control = buf[0x1B5]; /* control byte */ - use_jumper_geo = !(xd_info[drive].heads) || !(xd_info[drive].cylinders); - if (xd_geo[3*drive]) { - xd_manual_geo_set(drive); - xd_info[drive].control = rll ? 7 : 5; - } - else if (use_jumper_geo) { - n = (((jumper_state & 0x0F) >> (drive << 1)) & 0x03) | rll; - xd_info[drive].cylinders = geometry_table[n][0]; - xd_info[drive].heads = (u_char)(geometry_table[n][1]); - xd_info[drive].control = rll ? 7 : 5; -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; - xd_info[drive].wprecomp = geometry_table[n][3]; - xd_info[drive].ecc = 0x0B; -#endif /* 0 */ - } - if (!wd_1002) { - if (use_jumper_geo) - xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders, - geometry_table[n][2],geometry_table[n][3],0x0B); - else - xd_setparam(CMD_WDSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders, - ((u_short *) (buf))[0xD8],((u_short *) (buf))[0xDA],buf[0x1B4]); - } - /* 1002 based RLL controller requests converted addressing, but reports physical - (physical 26 sec., logical 17 sec.) - 1004 based ???? */ - if (rll & wd_1002) { - if ((xd_info[drive].cylinders *= 26, - xd_info[drive].cylinders /= 17) > 1023) - xd_info[drive].cylinders = 1023; /* 1024 ? */ -#if 0 - xd_info[drive].rwrite *= 26; - xd_info[drive].rwrite /= 17; - xd_info[drive].wprecomp *= 26 - xd_info[drive].wprecomp /= 17; -#endif /* 0 */ - } - } - else - printk("xd_wd_init_drive: error reading geometry for xd%c\n",'a'+drive); - -} - -static void __init xd_seagate_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xD0000: xd_iobase = 0x324; break; - case 0xD8000: xd_iobase = 0x328; break; - case 0xE0000: xd_iobase = 0x32C; break; - default: printk("xd_seagate_init_controller: unsupported BIOS address %06x\n",address); - break; - } - xd_maxsectors = 0x40; - - outb(0,XD_RESET); /* reset the controller */ -} - -static void __init xd_seagate_init_drive (u_char drive) -{ - u_char cmdblk[6],buf[0x200]; - - xd_build(cmdblk,CMD_ST11GETGEOM,drive,0,0,0,1,0); - if (!xd_command(cmdblk,PIO_MODE,buf,NULL,NULL,XD_TIMEOUT * 2)) { - xd_info[drive].heads = buf[0x04]; /* heads */ - xd_info[drive].cylinders = (buf[0x02] << 8) | buf[0x03]; /* cylinders */ - xd_info[drive].sectors = buf[0x05]; /* sectors */ - xd_info[drive].control = 0; /* control byte */ - } - else - printk("xd_seagate_init_drive: error reading geometry from xd%c\n", 'a'+drive); -} - -/* Omti support courtesy Dirk Melchers */ -static void __init xd_omti_init_controller (unsigned int address) -{ - switch (address) { - case 0x00000: - case 0xC8000: break; /*initial: 0x320 */ - case 0xD0000: xd_iobase = 0x324; break; - case 0xD8000: xd_iobase = 0x328; break; - case 0xE0000: xd_iobase = 0x32C; break; - default: printk("xd_omti_init_controller: unsupported BIOS address %06x\n",address); - break; - } - - xd_maxsectors = 0x40; - - outb(0,XD_RESET); /* reset the controller */ -} - -static void __init xd_omti_init_drive (u_char drive) -{ - /* gets infos from drive */ - xd_override_init_drive(drive); - - /* set other parameters, Hardcoded, not that nice :-) */ - xd_info[drive].control = 2; -} - -/* Xebec support (AK) */ -static void __init xd_xebec_init_controller (unsigned int address) -{ -/* iobase may be set manually in range 0x300 - 0x33C - irq may be set manually to 2(9),3,4,5,6,7 - dma may be set manually to 1,2,3 - (How to detect them ???) -BIOS address may be set manually in range 0x0 - 0xF8000 -If you need non-standard settings use the xd=... command */ - - switch (address) { - case 0x00000: - case 0xC8000: /* initially: xd_iobase==0x320 */ - case 0xD0000: - case 0xD2000: - case 0xD4000: - case 0xD6000: - case 0xD8000: - case 0xDA000: - case 0xDC000: - case 0xDE000: - case 0xE0000: break; - default: printk("xd_xebec_init_controller: unsupported BIOS address %06x\n",address); - break; - } - - xd_maxsectors = 0x01; - outb(0,XD_RESET); /* reset the controller */ - - msleep(XD_INIT_DISK_DELAY); -} - -static void __init xd_xebec_init_drive (u_char drive) -{ - /* values from controller's BIOS - BIOS chip may be removed */ - static u_short geometry_table[][5] = { - {0x132,4,0x080,0x080,0x7}, - {0x132,4,0x080,0x080,0x17}, - {0x264,2,0x100,0x100,0x7}, - {0x264,2,0x100,0x100,0x17}, - {0x132,8,0x080,0x080,0x7}, - {0x132,8,0x080,0x080,0x17}, - {0x264,4,0x100,0x100,0x6}, - {0x264,4,0x100,0x100,0x17}, - {0x2BC,5,0x2BC,0x12C,0x6}, - {0x3A5,4,0x3A5,0x3A5,0x7}, - {0x26C,6,0x26C,0x26C,0x7}, - {0x200,8,0x200,0x100,0x17}, - {0x400,5,0x400,0x400,0x7}, - {0x400,6,0x400,0x400,0x7}, - {0x264,8,0x264,0x200,0x17}, - {0x33E,7,0x33E,0x200,0x7}}; - u_char n; - - n = inb(XD_JUMPER) & 0x0F; /* BIOS's drive number: same geometry - is assumed for BOTH drives */ - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else { - xd_info[drive].heads = (u_char)(geometry_table[n][1]); /* heads */ - xd_info[drive].cylinders = geometry_table[n][0]; /* cylinders */ - xd_info[drive].sectors = 17; /* sectors */ -#if 0 - xd_info[drive].rwrite = geometry_table[n][2]; /* reduced write */ - xd_info[drive].precomp = geometry_table[n][3] /* write precomp */ - xd_info[drive].ecc = 0x0B; /* ecc length */ -#endif /* 0 */ - } - xd_info[drive].control = geometry_table[n][4]; /* control byte */ - xd_setparam(CMD_XBSETPARAM,drive,xd_info[drive].heads,xd_info[drive].cylinders,geometry_table[n][2],geometry_table[n][3],0x0B); - xd_recalibrate(drive); -} - -/* xd_override_init_drive: this finds disk geometry in a "binary search" style, narrowing in on the "correct" number of heads - etc. by trying values until it gets the highest successful value. Idea courtesy Salvador Abreu (spa@fct.unl.pt). */ -static void __init xd_override_init_drive (u_char drive) -{ - u_short min[] = { 0,0,0 },max[] = { 16,1024,64 },test[] = { 0,0,0 }; - u_char cmdblk[6],i; - - if (xd_geo[3*drive]) - xd_manual_geo_set(drive); - else { - for (i = 0; i < 3; i++) { - while (min[i] != max[i] - 1) { - test[i] = (min[i] + max[i]) / 2; - xd_build(cmdblk,CMD_SEEK,drive,(u_char) test[0],(u_short) test[1],(u_char) test[2],0,0); - if (!xd_command(cmdblk,PIO_MODE,NULL,NULL,NULL,XD_TIMEOUT * 2)) - min[i] = test[i]; - else - max[i] = test[i]; - } - test[i] = min[i]; - } - xd_info[drive].heads = (u_char) min[0] + 1; - xd_info[drive].cylinders = (u_short) min[1] + 1; - xd_info[drive].sectors = (u_char) min[2] + 1; - } - xd_info[drive].control = 0; -} - -/* xd_setup: initialise controller from command line parameters */ -static void __init do_xd_setup (int *integers) -{ - switch (integers[0]) { - case 4: if (integers[4] < 0) - nodma = 1; - else if (integers[4] < 8) - xd_dma = integers[4]; - case 3: if ((integers[3] > 0) && (integers[3] <= 0x3FC)) - xd_iobase = integers[3]; - case 2: if ((integers[2] > 0) && (integers[2] < 16)) - xd_irq = integers[2]; - case 1: xd_override = 1; - if ((integers[1] >= 0) && (integers[1] < ARRAY_SIZE(xd_sigs))) - xd_type = integers[1]; - case 0: break; - default:printk("xd: too many parameters for xd\n"); - } - xd_maxsectors = 0x01; -} - -/* xd_setparam: set the drive characteristics */ -static void __init xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc) -{ - u_char cmdblk[14]; - - xd_build(cmdblk,command,drive,0,0,0,0,0); - cmdblk[6] = (u_char) (cylinders >> 8) & 0x03; - cmdblk[7] = (u_char) (cylinders & 0xFF); - cmdblk[8] = heads & 0x1F; - cmdblk[9] = (u_char) (rwrite >> 8) & 0x03; - cmdblk[10] = (u_char) (rwrite & 0xFF); - cmdblk[11] = (u_char) (wprecomp >> 8) & 0x03; - cmdblk[12] = (u_char) (wprecomp & 0xFF); - cmdblk[13] = ecc; - - /* Some controllers require geometry info as data, not command */ - - if (xd_command(cmdblk,PIO_MODE,NULL,&cmdblk[6],NULL,XD_TIMEOUT * 2)) - printk("xd: error setting characteristics for xd%c\n", 'a'+drive); -} - - -#ifdef MODULE - -module_param_array(xd, int, NULL, 0); -module_param_array(xd_geo, int, NULL, 0); -module_param(nodma, bool, 0); - -MODULE_LICENSE("GPL"); - -void cleanup_module(void) -{ - int i; - unregister_blkdev(XT_DISK_MAJOR, "xd"); - for (i = 0; i < xd_drives; i++) { - del_gendisk(xd_gendisk[i]); - put_disk(xd_gendisk[i]); - } - blk_cleanup_queue(xd_queue); - release_region(xd_iobase,4); - if (xd_drives) { - free_irq(xd_irq, NULL); - free_dma(xd_dma); - if (xd_dma_buffer) - xd_dma_mem_free((unsigned long)xd_dma_buffer, xd_maxsectors * 0x200); - } -} -#else - -static int __init xd_setup (char *str) -{ - int ints[5]; - get_options (str, ARRAY_SIZE (ints), ints); - do_xd_setup (ints); - return 1; -} - -/* xd_manual_geo_init: initialise drive geometry from command line parameters - (used only for WD drives) */ -static int __init xd_manual_geo_init (char *str) -{ - int i, integers[1 + 3*XD_MAXDRIVES]; - - get_options (str, ARRAY_SIZE (integers), integers); - if (integers[0]%3 != 0) { - printk("xd: incorrect number of parameters for xd_geo\n"); - return 1; - } - for (i = 0; (i < integers[0]) && (i < 3*XD_MAXDRIVES); i++) - xd_geo[i] = integers[i+1]; - return 1; -} - -__setup ("xd=", xd_setup); -__setup ("xd_geo=", xd_manual_geo_init); - -#endif /* MODULE */ - -module_init(xd_init); -MODULE_ALIAS_BLOCKDEV_MAJOR(XT_DISK_MAJOR); diff --git a/drivers/block/xd.h b/drivers/block/xd.h deleted file mode 100644 index 37cacef16e9..00000000000 --- a/drivers/block/xd.h +++ /dev/null @@ -1,134 +0,0 @@ -#ifndef _LINUX_XD_H -#define _LINUX_XD_H - -/* - * This file contains the definitions for the IO ports and errors etc. for XT hard disk controllers (at least the DTC 5150X). - * - * Author: Pat Mackinlay, pat@it.com.au - * Date: 29/09/92 - * - * Revised: 01/01/93, ... - * - * Ref: DTC 5150X Controller Specification (thanks to Kevin Fowler, kevinf@agora.rain.com) - * Also thanks to: Salvador Abreu, Dave Thaler, Risto Kankkunen and Wim Van Dorst. - */ - -#include <linux/interrupt.h> - -/* XT hard disk controller registers */ -#define XD_DATA (xd_iobase + 0x00) /* data RW register */ -#define XD_RESET (xd_iobase + 0x01) /* reset WO register */ -#define XD_STATUS (xd_iobase + 0x01) /* status RO register */ -#define XD_SELECT (xd_iobase + 0x02) /* select WO register */ -#define XD_JUMPER (xd_iobase + 0x02) /* jumper RO register */ -#define XD_CONTROL (xd_iobase + 0x03) /* DMAE/INTE WO register */ -#define XD_RESERVED (xd_iobase + 0x03) /* reserved */ - -/* XT hard disk controller commands (incomplete list) */ -#define CMD_TESTREADY 0x00 /* test drive ready */ -#define CMD_RECALIBRATE 0x01 /* recalibrate drive */ -#define CMD_SENSE 0x03 /* request sense */ -#define CMD_FORMATDRV 0x04 /* format drive */ -#define CMD_VERIFY 0x05 /* read verify */ -#define CMD_FORMATTRK 0x06 /* format track */ -#define CMD_FORMATBAD 0x07 /* format bad track */ -#define CMD_READ 0x08 /* read */ -#define CMD_WRITE 0x0A /* write */ -#define CMD_SEEK 0x0B /* seek */ - -/* Controller specific commands */ -#define CMD_DTCSETPARAM 0x0C /* set drive parameters (DTC 5150X & CX only?) */ -#define CMD_DTCGETECC 0x0D /* get ecc error length (DTC 5150X only?) */ -#define CMD_DTCREADBUF 0x0E /* read sector buffer (DTC 5150X only?) */ -#define CMD_DTCWRITEBUF 0x0F /* write sector buffer (DTC 5150X only?) */ -#define CMD_DTCREMAPTRK 0x11 /* assign alternate track (DTC 5150X only?) */ -#define CMD_DTCGETPARAM 0xFB /* get drive parameters (DTC 5150X only?) */ -#define CMD_DTCSETSTEP 0xFC /* set step rate (DTC 5150X only?) */ -#define CMD_DTCSETGEOM 0xFE /* set geometry data (DTC 5150X only?) */ -#define CMD_DTCGETGEOM 0xFF /* get geometry data (DTC 5150X only?) */ -#define CMD_ST11GETGEOM 0xF8 /* get geometry data (Seagate ST11R/M only?) */ -#define CMD_WDSETPARAM 0x0C /* set drive parameters (WD 1004A27X only?) */ -#define CMD_XBSETPARAM 0x0C /* set drive parameters (XEBEC only?) */ - -/* Bits for command status byte */ -#define CSB_ERROR 0x02 /* error */ -#define CSB_LUN 0x20 /* logical Unit Number */ - -/* XT hard disk controller status bits */ -#define STAT_READY 0x01 /* controller is ready */ -#define STAT_INPUT 0x02 /* data flowing from controller to host */ -#define STAT_COMMAND 0x04 /* controller in command phase */ -#define STAT_SELECT 0x08 /* controller is selected */ -#define STAT_REQUEST 0x10 /* controller requesting data */ -#define STAT_INTERRUPT 0x20 /* controller requesting interrupt */ - -/* XT hard disk controller control bits */ -#define PIO_MODE 0x00 /* control bits to set for PIO */ -#define DMA_MODE 0x03 /* control bits to set for DMA & interrupt */ - -#define XD_MAXDRIVES 2 /* maximum 2 drives */ -#define XD_TIMEOUT HZ /* 1 second timeout */ -#define XD_RETRIES 4 /* maximum 4 retries */ - -#undef DEBUG /* define for debugging output */ - -#ifdef DEBUG - #define DEBUG_STARTUP /* debug driver initialisation */ - #define DEBUG_OVERRIDE /* debug override geometry detection */ - #define DEBUG_READWRITE /* debug each read/write command */ - #define DEBUG_OTHER /* debug misc. interrupt/DMA stuff */ - #define DEBUG_COMMAND /* debug each controller command */ -#endif /* DEBUG */ - -/* this structure defines the XT drives and their types */ -typedef struct { - u_char heads; - u_short cylinders; - u_char sectors; - u_char control; - int unit; -} XD_INFO; - -/* this structure defines a ROM BIOS signature */ -typedef struct { - unsigned int offset; - const char *string; - void (*init_controller)(unsigned int address); - void (*init_drive)(u_char drive); - const char *name; -} XD_SIGNATURE; - -#ifndef MODULE -static int xd_manual_geo_init (char *command); -#endif /* MODULE */ -static u_char xd_detect (u_char *controller, unsigned int *address); -static u_char xd_initdrives (void (*init_drive)(u_char drive)); - -static void do_xd_request (struct request_queue * q); -static int xd_ioctl (struct block_device *bdev,fmode_t mode,unsigned int cmd,unsigned long arg); -static int xd_readwrite (u_char operation,XD_INFO *disk,char *buffer,u_int block,u_int count); -static void xd_recalibrate (u_char drive); - -static irqreturn_t xd_interrupt_handler(int irq, void *dev_id); -static u_char xd_setup_dma (u_char opcode,u_char *buffer,u_int count); -static u_char *xd_build (u_char *cmdblk,u_char command,u_char drive,u_char head,u_short cylinder,u_char sector,u_char count,u_char control); -static void xd_watchdog (unsigned long unused); -static inline u_char xd_waitport (u_short port,u_char flags,u_char mask,u_long timeout); -static u_int xd_command (u_char *command,u_char mode,u_char *indata,u_char *outdata,u_char *sense,u_long timeout); - -/* card specific setup and geometry gathering code */ -static void xd_dtc_init_controller (unsigned int address); -static void xd_dtc5150cx_init_drive (u_char drive); -static void xd_dtc_init_drive (u_char drive); -static void xd_wd_init_controller (unsigned int address); -static void xd_wd_init_drive (u_char drive); -static void xd_seagate_init_controller (unsigned int address); -static void xd_seagate_init_drive (u_char drive); -static void xd_omti_init_controller (unsigned int address); -static void xd_omti_init_drive (u_char drive); -static void xd_xebec_init_controller (unsigned int address); -static void xd_xebec_init_drive (u_char drive); -static void xd_setparam (u_char command,u_char drive,u_char heads,u_short cylinders,u_short rwrite,u_short wprecomp,u_char ecc); -static void xd_override_init_drive (u_char drive); - -#endif /* _LINUX_XD_H */ diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 5ac841ff6cc..de1f319f7bd 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -46,6 +46,7 @@ #include <xen/xen.h> #include <asm/xen/hypervisor.h> #include <asm/xen/hypercall.h> +#include <xen/balloon.h> #include "common.h" /* @@ -239,6 +240,7 @@ static void free_persistent_gnts(struct rb_root *root, unsigned int num) ret = gnttab_unmap_refs(unmap, NULL, pages, segs_to_unmap); BUG_ON(ret); + free_xenballooned_pages(segs_to_unmap, pages); segs_to_unmap = 0; } @@ -527,8 +529,8 @@ static int xen_blkbk_map(struct blkif_request *req, GFP_KERNEL); if (!persistent_gnt) return -ENOMEM; - persistent_gnt->page = alloc_page(GFP_KERNEL); - if (!persistent_gnt->page) { + if (alloc_xenballooned_pages(1, &persistent_gnt->page, + false)) { kfree(persistent_gnt); return -ENOMEM; } @@ -879,7 +881,6 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, goto fail_response; } - preq.dev = req->u.rw.handle; preq.sector_number = req->u.rw.sector_number; preq.nr_sects = 0; diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 63980722db4..5e237f630c4 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -367,6 +367,7 @@ static int xen_blkbk_remove(struct xenbus_device *dev) be->blkif = NULL; } + kfree(be->mode); kfree(be); dev_set_drvdata(&dev->dev, NULL); return 0; @@ -502,6 +503,7 @@ static void backend_changed(struct xenbus_watch *watch, = container_of(watch, struct backend_info, backend_watch); struct xenbus_device *dev = be->dev; int cdrom = 0; + unsigned long handle; char *device_type; DPRINTK(""); @@ -521,10 +523,10 @@ static void backend_changed(struct xenbus_watch *watch, return; } - if ((be->major || be->minor) && - ((be->major != major) || (be->minor != minor))) { - pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", - be->major, be->minor, major, minor); + if (be->major | be->minor) { + if (be->major != major || be->minor != minor) + pr_warn(DRV_PFX "changing physical device (from %x:%x to %x:%x) not supported.\n", + be->major, be->minor, major, minor); return; } @@ -542,36 +544,33 @@ static void backend_changed(struct xenbus_watch *watch, kfree(device_type); } - if (be->major == 0 && be->minor == 0) { - /* Front end dir is a number, which is used as the handle. */ - - char *p = strrchr(dev->otherend, '/') + 1; - long handle; - err = strict_strtoul(p, 0, &handle); - if (err) - return; + /* Front end dir is a number, which is used as the handle. */ + err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle); + if (err) + return; - be->major = major; - be->minor = minor; + be->major = major; + be->minor = minor; - err = xen_vbd_create(be->blkif, handle, major, minor, - (NULL == strchr(be->mode, 'w')), cdrom); - if (err) { - be->major = 0; - be->minor = 0; - xenbus_dev_fatal(dev, err, "creating vbd structure"); - return; - } + err = xen_vbd_create(be->blkif, handle, major, minor, + !strchr(be->mode, 'w'), cdrom); + if (err) + xenbus_dev_fatal(dev, err, "creating vbd structure"); + else { err = xenvbd_sysfs_addif(dev); if (err) { xen_vbd_free(&be->blkif->vbd); - be->major = 0; - be->minor = 0; xenbus_dev_fatal(dev, err, "creating sysfs entries"); - return; } + } + if (err) { + kfree(be->mode); + be->mode = NULL; + be->major = 0; + be->minor = 0; + } else { /* We're potentially connected now */ xen_update_blkif_status(be->blkif); } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 11043c18ac5..c3dae2e0f29 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -791,7 +791,7 @@ static void blkif_restart_queue(struct work_struct *work) static void blkif_free(struct blkfront_info *info, int suspend) { struct llist_node *all_gnts; - struct grant *persistent_gnt; + struct grant *persistent_gnt, *tmp; struct llist_node *n; /* Prevent new requests being issued until we fix things up. */ @@ -805,10 +805,17 @@ static void blkif_free(struct blkfront_info *info, int suspend) /* Remove all persistent grants */ if (info->persistent_gnts_c) { all_gnts = llist_del_all(&info->persistent_gnts); - llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) { + persistent_gnt = llist_entry(all_gnts, typeof(*(persistent_gnt)), node); + while (persistent_gnt) { gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); __free_page(pfn_to_page(persistent_gnt->pfn)); - kfree(persistent_gnt); + tmp = persistent_gnt; + n = persistent_gnt->node.next; + if (n) + persistent_gnt = llist_entry(n, typeof(*(persistent_gnt)), node); + else + persistent_gnt = NULL; + kfree(tmp); } info->persistent_gnts_c = 0; } diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index a4605fd7e30..47a673070d7 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -27,8 +27,10 @@ #include <linux/of_address.h> #include <linux/irq.h> #include <linux/module.h> -#include <asm/sched_clock.h> +#include <asm/sched_clock.h> +#include <asm/localtimer.h> +#include <linux/percpu.h> /* * Timer block registers. */ @@ -49,6 +51,7 @@ #define TIMER1_RELOAD_OFF 0x0018 #define TIMER1_VAL_OFF 0x001c +#define LCL_TIMER_EVENTS_STATUS 0x0028 /* Global timers are connected to the coherency fabric clock, and the below divider reduces their incrementing frequency. */ #define TIMER_DIVIDER_SHIFT 5 @@ -57,14 +60,17 @@ /* * SoC-specific data. */ -static void __iomem *timer_base; -static int timer_irq; +static void __iomem *timer_base, *local_base; +static unsigned int timer_clk; +static bool timer25Mhz = true; /* * Number of timer ticks per jiffy. */ static u32 ticks_per_jiffy; +static struct clock_event_device __percpu **percpu_armada_370_xp_evt; + static u32 notrace armada_370_xp_read_sched_clock(void) { return ~readl(timer_base + TIMER0_VAL_OFF); @@ -78,24 +84,23 @@ armada_370_xp_clkevt_next_event(unsigned long delta, struct clock_event_device *dev) { u32 u; - /* * Clear clockevent timer interrupt. */ - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); /* * Setup new clockevent timer value. */ - writel(delta, timer_base + TIMER1_VAL_OFF); + writel(delta, local_base + TIMER0_VAL_OFF); /* * Enable the timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - u = ((u & ~TIMER1_RELOAD_EN) | TIMER1_EN | - TIMER1_DIV(TIMER_DIVIDER_SHIFT)); - writel(u, timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + u = ((u & ~TIMER0_RELOAD_EN) | TIMER0_EN | + TIMER0_DIV(TIMER_DIVIDER_SHIFT)); + writel(u, local_base + TIMER_CTRL_OFF); return 0; } @@ -107,37 +112,38 @@ armada_370_xp_clkevt_mode(enum clock_event_mode mode, u32 u; if (mode == CLOCK_EVT_MODE_PERIODIC) { + /* * Setup timer to fire at 1/HZ intervals. */ - writel(ticks_per_jiffy - 1, timer_base + TIMER1_RELOAD_OFF); - writel(ticks_per_jiffy - 1, timer_base + TIMER1_VAL_OFF); + writel(ticks_per_jiffy - 1, local_base + TIMER0_RELOAD_OFF); + writel(ticks_per_jiffy - 1, local_base + TIMER0_VAL_OFF); /* * Enable timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - writel((u | TIMER1_EN | TIMER1_RELOAD_EN | - TIMER1_DIV(TIMER_DIVIDER_SHIFT)), - timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + + writel((u | TIMER0_EN | TIMER0_RELOAD_EN | + TIMER0_DIV(TIMER_DIVIDER_SHIFT)), + local_base + TIMER_CTRL_OFF); } else { /* * Disable timer. */ - u = readl(timer_base + TIMER_CTRL_OFF); - writel(u & ~TIMER1_EN, timer_base + TIMER_CTRL_OFF); + u = readl(local_base + TIMER_CTRL_OFF); + writel(u & ~TIMER0_EN, local_base + TIMER_CTRL_OFF); /* * ACK pending timer interrupt. */ - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); - + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); } } static struct clock_event_device armada_370_xp_clkevt = { - .name = "armada_370_xp_tick", + .name = "armada_370_xp_per_cpu_tick", .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, .shift = 32, .rating = 300, @@ -150,32 +156,78 @@ static irqreturn_t armada_370_xp_timer_interrupt(int irq, void *dev_id) /* * ACK timer interrupt and call event handler. */ + struct clock_event_device *evt = *(struct clock_event_device **)dev_id; - writel(TIMER1_CLR_MASK, timer_base + TIMER_EVENTS_STATUS); - armada_370_xp_clkevt.event_handler(&armada_370_xp_clkevt); + writel(TIMER0_CLR_MASK, local_base + LCL_TIMER_EVENTS_STATUS); + evt->event_handler(evt); return IRQ_HANDLED; } -static struct irqaction armada_370_xp_timer_irq = { - .name = "armada_370_xp_tick", - .flags = IRQF_DISABLED | IRQF_TIMER, - .handler = armada_370_xp_timer_interrupt +/* + * Setup the local clock events for a CPU. + */ +static int __cpuinit armada_370_xp_timer_setup(struct clock_event_device *evt) +{ + u32 u; + int cpu = smp_processor_id(); + + /* Use existing clock_event for cpu 0 */ + if (!smp_processor_id()) + return 0; + + u = readl(local_base + TIMER_CTRL_OFF); + if (timer25Mhz) + writel(u | TIMER0_25MHZ, local_base + TIMER_CTRL_OFF); + else + writel(u & ~TIMER0_25MHZ, local_base + TIMER_CTRL_OFF); + + evt->name = armada_370_xp_clkevt.name; + evt->irq = armada_370_xp_clkevt.irq; + evt->features = armada_370_xp_clkevt.features; + evt->shift = armada_370_xp_clkevt.shift; + evt->rating = armada_370_xp_clkevt.rating, + evt->set_next_event = armada_370_xp_clkevt_next_event, + evt->set_mode = armada_370_xp_clkevt_mode, + evt->cpumask = cpumask_of(cpu); + + *__this_cpu_ptr(percpu_armada_370_xp_evt) = evt; + + clockevents_config_and_register(evt, timer_clk, 1, 0xfffffffe); + enable_percpu_irq(evt->irq, 0); + + return 0; +} + +static void armada_370_xp_timer_stop(struct clock_event_device *evt) +{ + evt->set_mode(CLOCK_EVT_MODE_UNUSED, evt); + disable_percpu_irq(evt->irq); +} + +static struct local_timer_ops armada_370_xp_local_timer_ops __cpuinitdata = { + .setup = armada_370_xp_timer_setup, + .stop = armada_370_xp_timer_stop, }; void __init armada_370_xp_timer_init(void) { u32 u; struct device_node *np; - unsigned int timer_clk; + int res; + np = of_find_compatible_node(NULL, NULL, "marvell,armada-370-xp-timer"); timer_base = of_iomap(np, 0); WARN_ON(!timer_base); + local_base = of_iomap(np, 1); if (of_find_property(np, "marvell,timer-25Mhz", NULL)) { /* The fixed 25MHz timer is available so let's use it */ + u = readl(local_base + TIMER_CTRL_OFF); + writel(u | TIMER0_25MHZ, + local_base + TIMER_CTRL_OFF); u = readl(timer_base + TIMER_CTRL_OFF); - writel(u | TIMER0_25MHZ | TIMER1_25MHZ, + writel(u | TIMER0_25MHZ, timer_base + TIMER_CTRL_OFF); timer_clk = 25000000; } else { @@ -183,15 +235,23 @@ void __init armada_370_xp_timer_init(void) struct clk *clk = of_clk_get(np, 0); WARN_ON(IS_ERR(clk)); rate = clk_get_rate(clk); + u = readl(local_base + TIMER_CTRL_OFF); + writel(u & ~(TIMER0_25MHZ), + local_base + TIMER_CTRL_OFF); + u = readl(timer_base + TIMER_CTRL_OFF); - writel(u & ~(TIMER0_25MHZ | TIMER1_25MHZ), + writel(u & ~(TIMER0_25MHZ), timer_base + TIMER_CTRL_OFF); + timer_clk = rate / TIMER_DIVIDER; + timer25Mhz = false; } - /* We use timer 0 as clocksource, and timer 1 for - clockevents */ - timer_irq = irq_of_parse_and_map(np, 1); + /* + * We use timer 0 as clocksource, and private(local) timer 0 + * for clockevents + */ + armada_370_xp_clkevt.irq = irq_of_parse_and_map(np, 4); ticks_per_jiffy = (timer_clk + HZ / 2) / HZ; @@ -216,12 +276,26 @@ void __init armada_370_xp_timer_init(void) "armada_370_xp_clocksource", timer_clk, 300, 32, clocksource_mmio_readl_down); - /* - * Setup clockevent timer (interrupt-driven). - */ - setup_irq(timer_irq, &armada_370_xp_timer_irq); + /* Register the clockevent on the private timer of CPU 0 */ armada_370_xp_clkevt.cpumask = cpumask_of(0); clockevents_config_and_register(&armada_370_xp_clkevt, timer_clk, 1, 0xfffffffe); -} + percpu_armada_370_xp_evt = alloc_percpu(struct clock_event_device *); + + + /* + * Setup clockevent timer (interrupt-driven). + */ + *__this_cpu_ptr(percpu_armada_370_xp_evt) = &armada_370_xp_clkevt; + res = request_percpu_irq(armada_370_xp_clkevt.irq, + armada_370_xp_timer_interrupt, + armada_370_xp_clkevt.name, + percpu_armada_370_xp_evt); + if (!res) { + enable_percpu_irq(armada_370_xp_clkevt.irq, 0); +#ifdef CONFIG_LOCAL_TIMERS + local_timer_register(&armada_370_xp_local_timer_ops); +#endif + } +} diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index acb709bfac0..e443f2c1dfd 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -80,6 +80,29 @@ config EDAC_MM_EDAC occurred so that a particular failing memory module can be replaced. If unsure, select 'Y'. +config EDAC_GHES + bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" + depends on ACPI_APEI_GHES && (EDAC_MM_EDAC=y) + default y + help + Not all machines support hardware-driven error report. Some of those + provide a BIOS-driven error report mechanism via ACPI, using the + APEI/GHES driver. By enabling this option, the error reports provided + by GHES are sent to userspace via the EDAC API. + + When this option is enabled, it will disable the hardware-driven + mechanisms, if a GHES BIOS is detected, entering into the + "Firmware First" mode. + + It should be noticed that keeping both GHES and a hardware-driven + error mechanism won't work well, as BIOS will race with OS, while + reading the error registers. So, if you want to not use "Firmware + first" GHES error mechanism, you should disable GHES either at + compilation time or by passing "ghes.disable=1" Kernel parameter + at boot time. + + In doubt, say 'Y'. + config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h" depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 5608a9ba61b..4154ed6a02c 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -16,6 +16,7 @@ ifdef CONFIG_PCI edac_core-y += edac_pci.o edac_pci_sysfs.o endif +obj-$(CONFIG_EDAC_GHES) += ghes_edac.o obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o edac_mce_amd-y := mce_amd.o diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h index 23bb99fa44f..3c2625e7980 100644 --- a/drivers/edac/edac_core.h +++ b/drivers/edac/edac_core.h @@ -453,6 +453,11 @@ extern struct mem_ctl_info *find_mci_by_dev(struct device *dev); extern struct mem_ctl_info *edac_mc_del_mc(struct device *dev); extern int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page); + +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e); + void edac_mc_handle_error(const enum hw_event_mc_err_type type, struct mem_ctl_info *mci, const u16 error_count, diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index d1e9eb191f2..cdb81aa73ab 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -42,6 +42,12 @@ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); +/* + * Used to lock EDAC MC to just one module, avoiding two drivers e. g. + * apei/ghes and i7core_edac to be used at the same time. + */ +static void const *edac_mc_owner; + unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned len) { @@ -441,13 +447,6 @@ struct mem_ctl_info *edac_mc_alloc(unsigned mc_num, mci->op_state = OP_ALLOC; - /* at this point, the root kobj is valid, and in order to - * 'free' the object, then the function: - * edac_mc_unregister_sysfs_main_kobj() must be called - * which will perform kobj unregistration and the actual free - * will occur during the kobject callback operation - */ - return mci; error: @@ -666,9 +665,9 @@ fail1: return 1; } -static void del_mc_from_global_list(struct mem_ctl_info *mci) +static int del_mc_from_global_list(struct mem_ctl_info *mci) { - atomic_dec(&edac_handlers); + int handlers = atomic_dec_return(&edac_handlers); list_del_rcu(&mci->link); /* these are for safe removal of devices from global list while @@ -676,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) */ synchronize_rcu(); INIT_LIST_HEAD(&mci->link); + + return handlers; } /** @@ -719,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find); /* FIXME - should a warning be printed if no error detection? correction? */ int edac_mc_add_mc(struct mem_ctl_info *mci) { + int ret = -EINVAL; edac_dbg(0, "\n"); #ifdef CONFIG_EDAC_DEBUG @@ -749,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) #endif mutex_lock(&mem_ctls_mutex); + if (edac_mc_owner && edac_mc_owner != mci->mod_name) { + ret = -EPERM; + goto fail0; + } + if (add_mc_to_global_list(mci)) goto fail0; @@ -775,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); + edac_mc_owner = mci->mod_name; + mutex_unlock(&mem_ctls_mutex); return 0; @@ -783,7 +792,7 @@ fail1: fail0: mutex_unlock(&mem_ctls_mutex); - return 1; + return ret; } EXPORT_SYMBOL_GPL(edac_mc_add_mc); @@ -809,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) return NULL; } - del_mc_from_global_list(mci); + if (!del_mc_from_global_list(mci)) + edac_mc_owner = NULL; mutex_unlock(&mem_ctls_mutex); /* flush workq processes */ @@ -907,6 +917,7 @@ const char *edac_layer_name[] = { [EDAC_MC_LAYER_CHANNEL] = "channel", [EDAC_MC_LAYER_SLOT] = "slot", [EDAC_MC_LAYER_CHIP_SELECT] = "csrow", + [EDAC_MC_LAYER_ALL_MEM] = "memory", }; EXPORT_SYMBOL_GPL(edac_layer_name); @@ -1054,7 +1065,46 @@ static void edac_ue_error(struct mem_ctl_info *mci, edac_inc_ue_error(mci, enable_per_layer_report, pos, error_count); } -#define OTHER_LABEL " or " +/** + * edac_raw_mc_handle_error - reports a memory event to userspace without doing + * anything to discover the error location + * + * @type: severity of the error (CE/UE/Fatal) + * @mci: a struct mem_ctl_info pointer + * @e: error description + * + * This raw function is used internally by edac_mc_handle_error(). It should + * only be called directly when the hardware error come directly from BIOS, + * like in the case of APEI GHES driver. + */ +void edac_raw_mc_handle_error(const enum hw_event_mc_err_type type, + struct mem_ctl_info *mci, + struct edac_raw_error_desc *e) +{ + char detail[80]; + int pos[EDAC_MAX_LAYERS] = { e->top_layer, e->mid_layer, e->low_layer }; + + /* Memory type dependent details about the error */ + if (type == HW_EVENT_ERR_CORRECTED) { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", + e->page_frame_number, e->offset_in_page, + e->grain, e->syndrome); + edac_ce_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report, + e->page_frame_number, e->offset_in_page, e->grain); + } else { + snprintf(detail, sizeof(detail), + "page:0x%lx offset:0x%lx grain:%ld", + e->page_frame_number, e->offset_in_page, e->grain); + + edac_ue_error(mci, e->error_count, pos, e->msg, e->location, e->label, + detail, e->other_detail, e->enable_per_layer_report); + } + + +} +EXPORT_SYMBOL_GPL(edac_raw_mc_handle_error); /** * edac_mc_handle_error - reports a memory event to userspace @@ -1086,19 +1136,27 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, const char *msg, const char *other_detail) { - /* FIXME: too much for stack: move it to some pre-alocated area */ - char detail[80], location[80]; - char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms]; char *p; int row = -1, chan = -1; int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer }; - int i; - long grain; - bool enable_per_layer_report = false; + int i, n_labels = 0; u8 grain_bits; + struct edac_raw_error_desc *e = &mci->error_desc; edac_dbg(3, "MC%d\n", mci->mc_idx); + /* Fills the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = error_count; + e->top_layer = top_layer; + e->mid_layer = mid_layer; + e->low_layer = low_layer; + e->page_frame_number = page_frame_number; + e->offset_in_page = offset_in_page; + e->syndrome = syndrome; + e->msg = msg; + e->other_detail = other_detail; + /* * Check if the event report is consistent and if the memory * location is known. If it is known, enable_per_layer_report will be @@ -1121,7 +1179,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, pos[i] = -1; } if (pos[i] >= 0) - enable_per_layer_report = true; + e->enable_per_layer_report = true; } /* @@ -1135,8 +1193,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * where each memory belongs to a separate channel within the same * branch. */ - grain = 0; - p = label; + p = e->label; *p = '\0'; for (i = 0; i < mci->tot_dimms; i++) { @@ -1150,8 +1207,8 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, continue; /* get the max grain, over the error match range */ - if (dimm->grain > grain) - grain = dimm->grain; + if (dimm->grain > e->grain) + e->grain = dimm->grain; /* * If the error is memory-controller wide, there's no need to @@ -1159,8 +1216,13 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, * channel/memory controller/... may be affected. * Also, don't show errors for empty DIMM slots. */ - if (enable_per_layer_report && dimm->nr_pages) { - if (p != label) { + if (e->enable_per_layer_report && dimm->nr_pages) { + if (n_labels >= EDAC_MAX_LABELS) { + e->enable_per_layer_report = false; + break; + } + n_labels++; + if (p != e->label) { strcpy(p, OTHER_LABEL); p += strlen(OTHER_LABEL); } @@ -1187,12 +1249,12 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } } - if (!enable_per_layer_report) { - strcpy(label, "any memory"); + if (!e->enable_per_layer_report) { + strcpy(e->label, "any memory"); } else { edac_dbg(4, "csrow/channel to increment: (%d,%d)\n", row, chan); - if (p == label) - strcpy(label, "unknown memory"); + if (p == e->label) + strcpy(e->label, "unknown memory"); if (type == HW_EVENT_ERR_CORRECTED) { if (row >= 0) { mci->csrows[row]->ce_count += error_count; @@ -1205,7 +1267,7 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, } /* Fill the RAM location data */ - p = location; + p = e->location; for (i = 0; i < mci->n_layers; i++) { if (pos[i] < 0) @@ -1215,32 +1277,16 @@ void edac_mc_handle_error(const enum hw_event_mc_err_type type, edac_layer_name[mci->layers[i].type], pos[i]); } - if (p > location) + if (p > e->location) *(p - 1) = '\0'; /* Report the error via the trace interface */ - grain_bits = fls_long(grain) + 1; - trace_mc_event(type, msg, label, error_count, - mci->mc_idx, top_layer, mid_layer, low_layer, - PAGES_TO_MiB(page_frame_number) | offset_in_page, - grain_bits, syndrome, other_detail); + grain_bits = fls_long(e->grain) + 1; + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, e->other_detail); - /* Memory type dependent details about the error */ - if (type == HW_EVENT_ERR_CORRECTED) { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld syndrome:0x%lx", - page_frame_number, offset_in_page, - grain, syndrome); - edac_ce_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report, - page_frame_number, offset_in_page, grain); - } else { - snprintf(detail, sizeof(detail), - "page:0x%lx offset:0x%lx grain:%ld", - page_frame_number, offset_in_page, grain); - - edac_ue_error(mci, error_count, pos, msg, location, label, - detail, other_detail, enable_per_layer_report); - } + edac_raw_mc_handle_error(type, mci, e); } EXPORT_SYMBOL_GPL(edac_mc_handle_error); diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c index 0ca1ca71157..4f4b6137d74 100644 --- a/drivers/edac/edac_mc_sysfs.c +++ b/drivers/edac/edac_mc_sysfs.c @@ -7,7 +7,7 @@ * * Written Doug Thompson <norsk5@xmission.com> www.softwarebitmaker.com * - * (c) 2012 - Mauro Carvalho Chehab <mchehab@redhat.com> + * (c) 2012-2013 - Mauro Carvalho Chehab <mchehab@redhat.com> * The entire API were re-written, and ported to use struct device * */ @@ -429,8 +429,12 @@ static int edac_create_csrow_objects(struct mem_ctl_info *mci) if (!nr_pages_per_csrow(csrow)) continue; err = edac_create_csrow_object(mci, mci->csrows[i], i); - if (err < 0) + if (err < 0) { + edac_dbg(1, + "failure: create csrow objects for csrow %d\n", + i); goto error; + } } return 0; @@ -677,9 +681,6 @@ static ssize_t mci_sdram_scrub_rate_store(struct device *dev, unsigned long bandwidth = 0; int new_bw = 0; - if (!mci->set_sdram_scrub_rate) - return -ENODEV; - if (strict_strtoul(data, 10, &bandwidth) < 0) return -EINVAL; @@ -703,9 +704,6 @@ static ssize_t mci_sdram_scrub_rate_show(struct device *dev, struct mem_ctl_info *mci = to_mci(dev); int bandwidth = 0; - if (!mci->get_sdram_scrub_rate) - return -ENODEV; - bandwidth = mci->get_sdram_scrub_rate(mci); if (bandwidth < 0) { edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n"); @@ -866,8 +864,7 @@ DEVICE_ATTR(ce_count, S_IRUGO, mci_ce_count_show, NULL); DEVICE_ATTR(max_location, S_IRUGO, mci_max_location_show, NULL); /* memory scrubber attribute file */ -DEVICE_ATTR(sdram_scrub_rate, S_IRUGO | S_IWUSR, mci_sdram_scrub_rate_show, - mci_sdram_scrub_rate_store); +DEVICE_ATTR(sdram_scrub_rate, 0, NULL, NULL); static struct attribute *mci_attrs[] = { &dev_attr_reset_counters.attr, @@ -878,7 +875,6 @@ static struct attribute *mci_attrs[] = { &dev_attr_ce_noinfo_count.attr, &dev_attr_ue_count.attr, &dev_attr_ce_count.attr, - &dev_attr_sdram_scrub_rate.attr, &dev_attr_max_location.attr, NULL }; @@ -1007,11 +1003,28 @@ int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) edac_dbg(0, "creating device %s\n", dev_name(&mci->dev)); err = device_add(&mci->dev); if (err < 0) { + edac_dbg(1, "failure: create device %s\n", dev_name(&mci->dev)); bus_unregister(&mci->bus); kfree(mci->bus.name); return err; } + if (mci->set_sdram_scrub_rate || mci->get_sdram_scrub_rate) { + if (mci->get_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IRUGO; + dev_attr_sdram_scrub_rate.show = &mci_sdram_scrub_rate_show; + } + if (mci->set_sdram_scrub_rate) { + dev_attr_sdram_scrub_rate.attr.mode |= S_IWUSR; + dev_attr_sdram_scrub_rate.store = &mci_sdram_scrub_rate_store; + } + err = device_create_file(&mci->dev, + &dev_attr_sdram_scrub_rate); + if (err) { + edac_dbg(1, "failure: create sdram_scrub_rate\n"); + goto fail2; + } + } /* * Create the dimm/rank devices */ @@ -1056,6 +1069,7 @@ fail: continue; device_unregister(&dimm->dev); } +fail2: device_unregister(&mci->dev); bus_unregister(&mci->bus); kfree(mci->bus.name); diff --git a/drivers/edac/edac_module.c b/drivers/edac/edac_module.c index 12c951a2c33..a66941fea5a 100644 --- a/drivers/edac/edac_module.c +++ b/drivers/edac/edac_module.c @@ -146,7 +146,7 @@ static void __exit edac_exit(void) /* * Inform the kernel of our entry and exit points */ -module_init(edac_init); +subsys_initcall(edac_init); module_exit(edac_exit); MODULE_LICENSE("GPL"); diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index 0056c4dae9d..e8658e45176 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -429,8 +429,8 @@ static void edac_pci_main_kobj_teardown(void) if (atomic_dec_return(&edac_pci_sysfs_refcount) == 0) { edac_dbg(0, "called kobject_put on main kobj\n"); kobject_put(edac_pci_top_main_kobj); + edac_put_sysfs_subsys(); } - edac_put_sysfs_subsys(); } /* diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c new file mode 100644 index 00000000000..bb534670ec0 --- /dev/null +++ b/drivers/edac/ghes_edac.c @@ -0,0 +1,537 @@ +/* + * GHES/EDAC Linux driver + * + * This file may be distributed under the terms of the GNU General Public + * License version 2. + * + * Copyright (c) 2013 by Mauro Carvalho Chehab <mchehab@redhat.com> + * + * Red Hat Inc. http://www.redhat.com + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <acpi/ghes.h> +#include <linux/edac.h> +#include <linux/dmi.h> +#include "edac_core.h" +#include <ras/ras_event.h> + +#define GHES_EDAC_REVISION " Ver: 1.0.0" + +struct ghes_edac_pvt { + struct list_head list; + struct ghes *ghes; + struct mem_ctl_info *mci; + + /* Buffers for the error handling routine */ + char detail_location[240]; + char other_detail[160]; + char msg[80]; +}; + +static LIST_HEAD(ghes_reglist); +static DEFINE_MUTEX(ghes_edac_lock); +static int ghes_edac_mc_num; + + +/* Memory Device - Type 17 of SMBIOS spec */ +struct memdev_dmi_entry { + u8 type; + u8 length; + u16 handle; + u16 phys_mem_array_handle; + u16 mem_err_info_handle; + u16 total_width; + u16 data_width; + u16 size; + u8 form_factor; + u8 device_set; + u8 device_locator; + u8 bank_locator; + u8 memory_type; + u16 type_detail; + u16 speed; + u8 manufacturer; + u8 serial_number; + u8 asset_tag; + u8 part_number; + u8 attributes; + u32 extended_size; + u16 conf_mem_clk_speed; +} __attribute__((__packed__)); + +struct ghes_edac_dimm_fill { + struct mem_ctl_info *mci; + unsigned count; +}; + +char *memory_type[] = { + [MEM_EMPTY] = "EMPTY", + [MEM_RESERVED] = "RESERVED", + [MEM_UNKNOWN] = "UNKNOWN", + [MEM_FPM] = "FPM", + [MEM_EDO] = "EDO", + [MEM_BEDO] = "BEDO", + [MEM_SDR] = "SDR", + [MEM_RDR] = "RDR", + [MEM_DDR] = "DDR", + [MEM_RDDR] = "RDDR", + [MEM_RMBS] = "RMBS", + [MEM_DDR2] = "DDR2", + [MEM_FB_DDR2] = "FB_DDR2", + [MEM_RDDR2] = "RDDR2", + [MEM_XDR] = "XDR", + [MEM_DDR3] = "DDR3", + [MEM_RDDR3] = "RDDR3", +}; + +static void ghes_edac_count_dimms(const struct dmi_header *dh, void *arg) +{ + int *num_dimm = arg; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) + (*num_dimm)++; +} + +static void ghes_edac_dmidecode(const struct dmi_header *dh, void *arg) +{ + struct ghes_edac_dimm_fill *dimm_fill = arg; + struct mem_ctl_info *mci = dimm_fill->mci; + + if (dh->type == DMI_ENTRY_MEM_DEVICE) { + struct memdev_dmi_entry *entry = (struct memdev_dmi_entry *)dh; + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, + dimm_fill->count, 0, 0); + + if (entry->size == 0xffff) { + pr_info("Can't get DIMM%i size\n", + dimm_fill->count); + dimm->nr_pages = MiB_TO_PAGES(32);/* Unknown */ + } else if (entry->size == 0x7fff) { + dimm->nr_pages = MiB_TO_PAGES(entry->extended_size); + } else { + if (entry->size & 1 << 15) + dimm->nr_pages = MiB_TO_PAGES((entry->size & + 0x7fff) << 10); + else + dimm->nr_pages = MiB_TO_PAGES(entry->size); + } + + switch (entry->memory_type) { + case 0x12: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR; + else + dimm->mtype = MEM_DDR; + break; + case 0x13: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR2; + else + dimm->mtype = MEM_DDR2; + break; + case 0x14: + dimm->mtype = MEM_FB_DDR2; + break; + case 0x18: + if (entry->type_detail & 1 << 13) + dimm->mtype = MEM_RDDR3; + else + dimm->mtype = MEM_DDR3; + break; + default: + if (entry->type_detail & 1 << 6) + dimm->mtype = MEM_RMBS; + else if ((entry->type_detail & ((1 << 7) | (1 << 13))) + == ((1 << 7) | (1 << 13))) + dimm->mtype = MEM_RDR; + else if (entry->type_detail & 1 << 7) + dimm->mtype = MEM_SDR; + else if (entry->type_detail & 1 << 9) + dimm->mtype = MEM_EDO; + else + dimm->mtype = MEM_UNKNOWN; + } + + /* + * Actually, we can only detect if the memory has bits for + * checksum or not + */ + if (entry->total_width == entry->data_width) + dimm->edac_mode = EDAC_NONE; + else + dimm->edac_mode = EDAC_SECDED; + + dimm->dtype = DEV_UNKNOWN; + dimm->grain = 128; /* Likely, worse case */ + + /* + * FIXME: It shouldn't be hard to also fill the DIMM labels + */ + + if (dimm->nr_pages) { + edac_dbg(1, "DIMM%i: %s size = %d MB%s\n", + dimm_fill->count, memory_type[dimm->mtype], + PAGES_TO_MiB(dimm->nr_pages), + (dimm->edac_mode != EDAC_NONE) ? "(ECC)" : ""); + edac_dbg(2, "\ttype %d, detail 0x%02x, width %d(total %d)\n", + entry->memory_type, entry->type_detail, + entry->total_width, entry->data_width); + } + + dimm_fill->count++; + } +} + +void ghes_edac_report_mem_error(struct ghes *ghes, int sev, + struct cper_sec_mem_err *mem_err) +{ + enum hw_event_mc_err_type type; + struct edac_raw_error_desc *e; + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt = NULL; + char *p; + u8 grain_bits; + + list_for_each_entry(pvt, &ghes_reglist, list) { + if (ghes == pvt->ghes) + break; + } + if (!pvt) { + pr_err("Internal error: Can't find EDAC structure\n"); + return; + } + mci = pvt->mci; + e = &mci->error_desc; + + /* Cleans the error report buffer */ + memset(e, 0, sizeof (*e)); + e->error_count = 1; + strcpy(e->label, "unknown label"); + e->msg = pvt->msg; + e->other_detail = pvt->other_detail; + e->top_layer = -1; + e->mid_layer = -1; + e->low_layer = -1; + *pvt->other_detail = '\0'; + *pvt->msg = '\0'; + + switch (sev) { + case GHES_SEV_CORRECTED: + type = HW_EVENT_ERR_CORRECTED; + break; + case GHES_SEV_RECOVERABLE: + type = HW_EVENT_ERR_UNCORRECTED; + break; + case GHES_SEV_PANIC: + type = HW_EVENT_ERR_FATAL; + break; + default: + case GHES_SEV_NO: + type = HW_EVENT_ERR_INFO; + } + + edac_dbg(1, "error validation_bits: 0x%08llx\n", + (long long)mem_err->validation_bits); + + /* Error type, mapped on e->msg */ + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { + p = pvt->msg; + switch (mem_err->error_type) { + case 0: + p += sprintf(p, "Unknown"); + break; + case 1: + p += sprintf(p, "No error"); + break; + case 2: + p += sprintf(p, "Single-bit ECC"); + break; + case 3: + p += sprintf(p, "Multi-bit ECC"); + break; + case 4: + p += sprintf(p, "Single-symbol ChipKill ECC"); + break; + case 5: + p += sprintf(p, "Multi-symbol ChipKill ECC"); + break; + case 6: + p += sprintf(p, "Master abort"); + break; + case 7: + p += sprintf(p, "Target abort"); + break; + case 8: + p += sprintf(p, "Parity Error"); + break; + case 9: + p += sprintf(p, "Watchdog timeout"); + break; + case 10: + p += sprintf(p, "Invalid address"); + break; + case 11: + p += sprintf(p, "Mirror Broken"); + break; + case 12: + p += sprintf(p, "Memory Sparing"); + break; + case 13: + p += sprintf(p, "Scrub corrected error"); + break; + case 14: + p += sprintf(p, "Scrub uncorrected error"); + break; + case 15: + p += sprintf(p, "Physical Memory Map-out event"); + break; + default: + p += sprintf(p, "reserved error (%d)", + mem_err->error_type); + } + } else { + strcpy(pvt->msg, "unknown error"); + } + + /* Error address */ + if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) { + e->page_frame_number = mem_err->physical_addr >> PAGE_SHIFT; + e->offset_in_page = mem_err->physical_addr & ~PAGE_MASK; + } + + /* Error grain */ + if (mem_err->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK) { + e->grain = ~(mem_err->physical_addr_mask & ~PAGE_MASK); + } + + /* Memory error location, mapped on e->location */ + p = e->location; + if (mem_err->validation_bits & CPER_MEM_VALID_NODE) + p += sprintf(p, "node:%d ", mem_err->node); + if (mem_err->validation_bits & CPER_MEM_VALID_CARD) + p += sprintf(p, "card:%d ", mem_err->card); + if (mem_err->validation_bits & CPER_MEM_VALID_MODULE) + p += sprintf(p, "module:%d ", mem_err->module); + if (mem_err->validation_bits & CPER_MEM_VALID_BANK) + p += sprintf(p, "bank:%d ", mem_err->bank); + if (mem_err->validation_bits & CPER_MEM_VALID_ROW) + p += sprintf(p, "row:%d ", mem_err->row); + if (mem_err->validation_bits & CPER_MEM_VALID_COLUMN) + p += sprintf(p, "col:%d ", mem_err->column); + if (mem_err->validation_bits & CPER_MEM_VALID_BIT_POSITION) + p += sprintf(p, "bit_pos:%d ", mem_err->bit_pos); + if (p > e->location) + *(p - 1) = '\0'; + + /* All other fields are mapped on e->other_detail */ + p = pvt->other_detail; + if (mem_err->validation_bits & CPER_MEM_VALID_ERROR_STATUS) { + u64 status = mem_err->error_status; + + p += sprintf(p, "status(0x%016llx): ", (long long)status); + switch ((status >> 8) & 0xff) { + case 1: + p += sprintf(p, "Error detected internal to the component "); + break; + case 16: + p += sprintf(p, "Error detected in the bus "); + break; + case 4: + p += sprintf(p, "Storage error in DRAM memory "); + break; + case 5: + p += sprintf(p, "Storage error in TLB "); + break; + case 6: + p += sprintf(p, "Storage error in cache "); + break; + case 7: + p += sprintf(p, "Error in one or more functional units "); + break; + case 8: + p += sprintf(p, "component failed self test "); + break; + case 9: + p += sprintf(p, "Overflow or undervalue of internal queue "); + break; + case 17: + p += sprintf(p, "Virtual address not found on IO-TLB or IO-PDIR "); + break; + case 18: + p += sprintf(p, "Improper access error "); + break; + case 19: + p += sprintf(p, "Access to a memory address which is not mapped to any component "); + break; + case 20: + p += sprintf(p, "Loss of Lockstep "); + break; + case 21: + p += sprintf(p, "Response not associated with a request "); + break; + case 22: + p += sprintf(p, "Bus parity error - must also set the A, C, or D Bits "); + break; + case 23: + p += sprintf(p, "Detection of a PATH_ERROR "); + break; + case 25: + p += sprintf(p, "Bus operation timeout "); + break; + case 26: + p += sprintf(p, "A read was issued to data that has been poisoned "); + break; + default: + p += sprintf(p, "reserved "); + break; + } + } + if (mem_err->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) + p += sprintf(p, "requestorID: 0x%016llx ", + (long long)mem_err->requestor_id); + if (mem_err->validation_bits & CPER_MEM_VALID_RESPONDER_ID) + p += sprintf(p, "responderID: 0x%016llx ", + (long long)mem_err->responder_id); + if (mem_err->validation_bits & CPER_MEM_VALID_TARGET_ID) + p += sprintf(p, "targetID: 0x%016llx ", + (long long)mem_err->responder_id); + if (p > pvt->other_detail) + *(p - 1) = '\0'; + + /* Generate the trace event */ + grain_bits = fls_long(e->grain); + sprintf(pvt->detail_location, "APEI location: %s %s", + e->location, e->other_detail); + trace_mc_event(type, e->msg, e->label, e->error_count, + mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, + PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, + grain_bits, e->syndrome, pvt->detail_location); + + /* Report the error via EDAC API */ + edac_raw_mc_handle_error(type, mci, e); +} +EXPORT_SYMBOL_GPL(ghes_edac_report_mem_error); + +int ghes_edac_register(struct ghes *ghes, struct device *dev) +{ + bool fake = false; + int rc, num_dimm = 0; + struct mem_ctl_info *mci; + struct edac_mc_layer layers[1]; + struct ghes_edac_pvt *pvt; + struct ghes_edac_dimm_fill dimm_fill; + + /* Get the number of DIMMs */ + dmi_walk(ghes_edac_count_dimms, &num_dimm); + + /* Check if we've got a bogus BIOS */ + if (num_dimm == 0) { + fake = true; + num_dimm = 1; + } + + layers[0].type = EDAC_MC_LAYER_ALL_MEM; + layers[0].size = num_dimm; + layers[0].is_virt_csrow = true; + + /* + * We need to serialize edac_mc_alloc() and edac_mc_add_mc(), + * to avoid duplicated memory controller numbers + */ + mutex_lock(&ghes_edac_lock); + mci = edac_mc_alloc(ghes_edac_mc_num, ARRAY_SIZE(layers), layers, + sizeof(*pvt)); + if (!mci) { + pr_info("Can't allocate memory for EDAC data\n"); + mutex_unlock(&ghes_edac_lock); + return -ENOMEM; + } + + pvt = mci->pvt_info; + memset(pvt, 0, sizeof(*pvt)); + list_add_tail(&pvt->list, &ghes_reglist); + pvt->ghes = ghes; + pvt->mci = mci; + mci->pdev = dev; + + mci->mtype_cap = MEM_FLAG_EMPTY; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "ghes_edac.c"; + mci->mod_ver = GHES_EDAC_REVISION; + mci->ctl_name = "ghes_edac"; + mci->dev_name = "ghes"; + + if (!ghes_edac_mc_num) { + if (!fake) { + pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n"); + pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n"); + pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); + pr_info("If you find incorrect reports, please contact your hardware vendor\n"); + pr_info("to correct its BIOS.\n"); + pr_info("This system has %d DIMM sockets.\n", + num_dimm); + } else { + pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); + pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); + pr_info("work on such system. Use this driver with caution\n"); + } + } + + if (!fake) { + /* + * Fill DIMM info from DMI for the memory controller #0 + * + * Keep it in blank for the other memory controllers, as + * there's no reliable way to properly credit each DIMM to + * the memory controller, as different BIOSes fill the + * DMI bank location fields on different ways + */ + if (!ghes_edac_mc_num) { + dimm_fill.count = 0; + dimm_fill.mci = mci; + dmi_walk(ghes_edac_dmidecode, &dimm_fill); + } + } else { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, 0, 0, 0); + + dimm->nr_pages = 1; + dimm->grain = 128; + dimm->mtype = MEM_UNKNOWN; + dimm->dtype = DEV_UNKNOWN; + dimm->edac_mode = EDAC_SECDED; + } + + rc = edac_mc_add_mc(mci); + if (rc < 0) { + pr_info("Can't register at EDAC core\n"); + edac_mc_free(mci); + mutex_unlock(&ghes_edac_lock); + return -ENODEV; + } + + ghes_edac_mc_num++; + mutex_unlock(&ghes_edac_lock); + return 0; +} +EXPORT_SYMBOL_GPL(ghes_edac_register); + +void ghes_edac_unregister(struct ghes *ghes) +{ + struct mem_ctl_info *mci; + struct ghes_edac_pvt *pvt, *tmp; + + list_for_each_entry_safe(pvt, tmp, &ghes_reglist, list) { + if (ghes == pvt->ghes) { + mci = pvt->mci; + edac_mc_del_mc(mci->pdev); + edac_mc_free(mci); + list_del(&pvt->list); + } + } +} +EXPORT_SYMBOL_GPL(ghes_edac_unregister); diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index 4e8337602e7..aa44c1718f5 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -106,16 +106,26 @@ static int nr_channels; static int how_many_channels(struct pci_dev *pdev) { + int n_channels; + unsigned char capid0_8b; /* 8th byte of CAPID0 */ pci_read_config_byte(pdev, I3200_CAPID0 + 8, &capid0_8b); + if (capid0_8b & 0x20) { /* check DCD: Dual Channel Disable */ edac_dbg(0, "In single channel mode\n"); - return 1; + n_channels = 1; } else { edac_dbg(0, "In dual channel mode\n"); - return 2; + n_channels = 2; } + + if (capid0_8b & 0x10) /* check if both channels are filled */ + edac_dbg(0, "2 DIMMS per channel disabled\n"); + else + edac_dbg(0, "2 DIMMS per channel enabled\n"); + + return n_channels; } static unsigned long eccerrlog_syndrome(u64 log) @@ -290,6 +300,8 @@ static void i3200_get_drbs(void __iomem *window, for (i = 0; i < I3200_RANKS_PER_CHANNEL; i++) { drbs[0][i] = readw(window + I3200_C0DRB + 2*i) & I3200_DRB_MASK; drbs[1][i] = readw(window + I3200_C1DRB + 2*i) & I3200_DRB_MASK; + + edac_dbg(0, "drb[0][%d] = %d, drb[1][%d] = %d\n", i, drbs[0][i], i, drbs[1][i]); } } @@ -311,6 +323,9 @@ static unsigned long drb_to_nr_pages( int n; n = drbs[channel][rank]; + if (!n) + return 0; + if (rank > 0) n -= drbs[channel][rank - 1]; if (stacked && (channel == 1) && @@ -377,19 +392,19 @@ static int i3200_probe1(struct pci_dev *pdev, int dev_idx) * cumulative; the last one will contain the total memory * contained in all ranks. */ - for (i = 0; i < mci->nr_csrows; i++) { + for (i = 0; i < I3200_DIMMS; i++) { unsigned long nr_pages; - struct csrow_info *csrow = mci->csrows[i]; - nr_pages = drb_to_nr_pages(drbs, stacked, - i / I3200_RANKS_PER_CHANNEL, - i % I3200_RANKS_PER_CHANNEL); + for (j = 0; j < nr_channels; j++) { + struct dimm_info *dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, i, j, 0); - if (nr_pages == 0) - continue; + nr_pages = drb_to_nr_pages(drbs, stacked, j, i); + if (nr_pages == 0) + continue; - for (j = 0; j < nr_channels; j++) { - struct dimm_info *dimm = csrow->channels[j]->dimm; + edac_dbg(0, "csrow %d, channel %d%s, size = %ld Mb\n", i, j, + stacked ? " (stacked)" : "", PAGES_TO_MiB(nr_pages)); dimm->nr_pages = nr_pages; dimm->grain = nr_pages << PAGE_SHIFT; diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c index d6955b2cc99..1b635178cc4 100644 --- a/drivers/edac/i5100_edac.c +++ b/drivers/edac/i5100_edac.c @@ -27,6 +27,7 @@ #include <linux/edac.h> #include <linux/delay.h> #include <linux/mmzone.h> +#include <linux/debugfs.h> #include "edac_core.h" @@ -68,6 +69,14 @@ I5100_FERR_NF_MEM_M1ERR_MASK) #define I5100_NERR_NF_MEM 0xa4 /* MC Next Non-Fatal Errors */ #define I5100_EMASK_MEM 0xa8 /* MC Error Mask Register */ +#define I5100_MEM0EINJMSK0 0x200 /* Injection Mask0 Register Channel 0 */ +#define I5100_MEM1EINJMSK0 0x208 /* Injection Mask0 Register Channel 1 */ +#define I5100_MEMXEINJMSK0_EINJEN (1 << 27) +#define I5100_MEM0EINJMSK1 0x204 /* Injection Mask1 Register Channel 0 */ +#define I5100_MEM1EINJMSK1 0x206 /* Injection Mask1 Register Channel 1 */ + +/* Device 19, Function 0 */ +#define I5100_DINJ0 0x9a /* device 21 and 22, func 0 */ #define I5100_MTR_0 0x154 /* Memory Technology Registers 0-3 */ @@ -338,13 +347,26 @@ struct i5100_priv { unsigned ranksperchan; /* number of ranks per channel */ struct pci_dev *mc; /* device 16 func 1 */ + struct pci_dev *einj; /* device 19 func 0 */ struct pci_dev *ch0mm; /* device 21 func 0 */ struct pci_dev *ch1mm; /* device 22 func 0 */ struct delayed_work i5100_scrubbing; int scrub_enable; + + /* Error injection */ + u8 inject_channel; + u8 inject_hlinesel; + u8 inject_deviceptr1; + u8 inject_deviceptr2; + u16 inject_eccmask1; + u16 inject_eccmask2; + + struct dentry *debugfs; }; +static struct dentry *i5100_debugfs; + /* map a rank/chan to a slot number on the mainboard */ static int i5100_rank_to_slot(const struct mem_ctl_info *mci, int chan, int rank) @@ -863,13 +885,126 @@ static void i5100_init_csrows(struct mem_ctl_info *mci) } } +/**************************************************************************** + * Error injection routines + ****************************************************************************/ + +static void i5100_do_inject(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + u32 mask0; + u16 mask1; + + /* MEM[1:0]EINJMSK0 + * 31 - ADDRMATCHEN + * 29:28 - HLINESEL + * 00 Reserved + * 01 Lower half of cache line + * 10 Upper half of cache line + * 11 Both upper and lower parts of cache line + * 27 - EINJEN + * 25:19 - XORMASK1 for deviceptr1 + * 9:5 - SEC2RAM for deviceptr2 + * 4:0 - FIR2RAM for deviceptr1 + */ + mask0 = ((priv->inject_hlinesel & 0x3) << 28) | + I5100_MEMXEINJMSK0_EINJEN | + ((priv->inject_eccmask1 & 0xffff) << 10) | + ((priv->inject_deviceptr2 & 0x1f) << 5) | + (priv->inject_deviceptr1 & 0x1f); + + /* MEM[1:0]EINJMSK1 + * 15:0 - XORMASK2 for deviceptr2 + */ + mask1 = priv->inject_eccmask2; + + if (priv->inject_channel == 0) { + pci_write_config_dword(priv->mc, I5100_MEM0EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM0EINJMSK1, mask1); + } else { + pci_write_config_dword(priv->mc, I5100_MEM1EINJMSK0, mask0); + pci_write_config_word(priv->mc, I5100_MEM1EINJMSK1, mask1); + } + + /* Error Injection Response Function + * Intel 5100 Memory Controller Hub Chipset (318378) datasheet + * hints about this register but carry no data about them. All + * data regarding device 19 is based on experimentation and the + * Intel 7300 Chipset Memory Controller Hub (318082) datasheet + * which appears to be accurate for the i5100 in this area. + * + * The injection code don't work without setting this register. + * The register needs to be flipped off then on else the hardware + * will only preform the first injection. + * + * Stop condition bits 7:4 + * 1010 - Stop after one injection + * 1011 - Never stop injecting faults + * + * Start condition bits 3:0 + * 1010 - Never start + * 1011 - Start immediately + */ + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xaa); + pci_write_config_byte(priv->einj, I5100_DINJ0, 0xab); +} + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) +static ssize_t inject_enable_write(struct file *file, const char __user *data, + size_t count, loff_t *ppos) +{ + struct device *dev = file->private_data; + struct mem_ctl_info *mci = to_mci(dev); + + i5100_do_inject(mci); + + return count; +} + +static const struct file_operations i5100_inject_enable_fops = { + .open = simple_open, + .write = inject_enable_write, + .llseek = generic_file_llseek, +}; + +static int i5100_setup_debugfs(struct mem_ctl_info *mci) +{ + struct i5100_priv *priv = mci->pvt_info; + + if (!i5100_debugfs) + return -ENODEV; + + priv->debugfs = debugfs_create_dir(mci->bus.name, i5100_debugfs); + + if (!priv->debugfs) + return -ENOMEM; + + debugfs_create_x8("inject_channel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_channel); + debugfs_create_x8("inject_hlinesel", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_hlinesel); + debugfs_create_x8("inject_deviceptr1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr1); + debugfs_create_x8("inject_deviceptr2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_deviceptr2); + debugfs_create_x16("inject_eccmask1", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask1); + debugfs_create_x16("inject_eccmask2", S_IRUGO | S_IWUSR, priv->debugfs, + &priv->inject_eccmask2); + debugfs_create_file("inject_enable", S_IWUSR, priv->debugfs, + &mci->dev, &i5100_inject_enable_fops); + + return 0; + +} + static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { int rc; struct mem_ctl_info *mci; struct edac_mc_layer layers[2]; struct i5100_priv *priv; - struct pci_dev *ch0mm, *ch1mm; + struct pci_dev *ch0mm, *ch1mm, *einj; int ret = 0; u32 dw; int ranksperch; @@ -941,6 +1076,22 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto bail_disable_ch1; } + + /* device 19, func 0, Error injection */ + einj = pci_get_device_func(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_5100_19, 0); + if (!einj) { + ret = -ENODEV; + goto bail_einj; + } + + rc = pci_enable_device(einj); + if (rc < 0) { + ret = rc; + goto bail_disable_einj; + } + + mci->pdev = &pdev->dev; priv = mci->pvt_info; @@ -948,6 +1099,7 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) priv->mc = pdev; priv->ch0mm = ch0mm; priv->ch1mm = ch1mm; + priv->einj = einj; INIT_DELAYED_WORK(&(priv->i5100_scrubbing), i5100_refresh_scrubbing); @@ -975,6 +1127,13 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) mci->set_sdram_scrub_rate = i5100_set_scrub_rate; mci->get_sdram_scrub_rate = i5100_get_scrub_rate; + priv->inject_channel = 0; + priv->inject_hlinesel = 0; + priv->inject_deviceptr1 = 0; + priv->inject_deviceptr2 = 0; + priv->inject_eccmask1 = 0; + priv->inject_eccmask2 = 0; + i5100_init_csrows(mci); /* this strange construction seems to be in every driver, dunno why */ @@ -992,6 +1151,8 @@ static int i5100_init_one(struct pci_dev *pdev, const struct pci_device_id *id) goto bail_scrub; } + i5100_setup_debugfs(mci); + return ret; bail_scrub: @@ -999,6 +1160,12 @@ bail_scrub: cancel_delayed_work_sync(&(priv->i5100_scrubbing)); edac_mc_free(mci); +bail_disable_einj: + pci_disable_device(einj); + +bail_einj: + pci_dev_put(einj); + bail_disable_ch1: pci_disable_device(ch1mm); @@ -1030,14 +1197,18 @@ static void i5100_remove_one(struct pci_dev *pdev) priv = mci->pvt_info; + debugfs_remove_recursive(priv->debugfs); + priv->scrub_enable = 0; cancel_delayed_work_sync(&(priv->i5100_scrubbing)); pci_disable_device(pdev); pci_disable_device(priv->ch0mm); pci_disable_device(priv->ch1mm); + pci_disable_device(priv->einj); pci_dev_put(priv->ch0mm); pci_dev_put(priv->ch1mm); + pci_dev_put(priv->einj); edac_mc_free(mci); } @@ -1060,13 +1231,16 @@ static int __init i5100_init(void) { int pci_rc; - pci_rc = pci_register_driver(&i5100_driver); + i5100_debugfs = debugfs_create_dir("i5100_edac", NULL); + pci_rc = pci_register_driver(&i5100_driver); return (pci_rc < 0) ? pci_rc : 0; } static void __exit i5100_exit(void) { + debugfs_remove(i5100_debugfs); + pci_unregister_driver(&i5100_driver); } diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index e213d030b0d..0ec3e95a12c 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -420,21 +420,21 @@ static inline int numdimms(u32 dimms) static inline int numrank(u32 rank) { - static int ranks[4] = { 1, 2, 4, -EINVAL }; + static const int ranks[] = { 1, 2, 4, -EINVAL }; return ranks[rank & 0x3]; } static inline int numbank(u32 bank) { - static int banks[4] = { 4, 8, 16, -EINVAL }; + static const int banks[] = { 4, 8, 16, -EINVAL }; return banks[bank & 0x3]; } static inline int numrow(u32 row) { - static int rows[8] = { + static const int rows[] = { 1 << 12, 1 << 13, 1 << 14, 1 << 15, 1 << 16, -EINVAL, -EINVAL, -EINVAL, }; @@ -444,7 +444,7 @@ static inline int numrow(u32 row) static inline int numcol(u32 col) { - static int cols[8] = { + static const int cols[] = { 1 << 10, 1 << 11, 1 << 12, -EINVAL, }; return cols[col & 0x3]; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index da7e2986e3d..57244f99561 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -639,7 +639,7 @@ static void get_memory_layout(const struct mem_ctl_info *mci) tmp_mb = (1 + pvt->tohm) >> 20; mb = div_u64_rem(tmp_mb, 1000, &kb); - edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)", mb, kb, (u64)pvt->tohm); + edac_dbg(0, "TOHM: %u.%03u GB (0x%016Lx)\n", mb, kb, (u64)pvt->tohm); /* * Step 2) Get SAD range and SAD Interleave list diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e67a4be0080..bb2cd3ce9b0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -626,7 +626,6 @@ static void dec_pending(struct dm_io *io, int error) queue_io(md, bio); } else { /* done with normal IO or empty flush */ - trace_block_bio_complete(md->queue, bio, io_error); bio_endio(bio, io_error); } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 697f026cb31..5af2d270908 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -184,8 +184,6 @@ static void return_io(struct bio *return_bi) return_bi = bi->bi_next; bi->bi_next = NULL; bi->bi_size = 0; - trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), - bi, 0); bio_endio(bi, 0); bi = return_bi; } @@ -3916,8 +3914,6 @@ static void raid5_align_endio(struct bio *bi, int error) rdev_dec_pending(rdev, conf->mddev); if (!error && uptodate) { - trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev), - raid_bi, 0); bio_endio(raid_bi, 0); if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); @@ -4376,8 +4372,6 @@ static void make_request(struct mddev *mddev, struct bio * bi) if ( rw == WRITE ) md_write_end(mddev); - trace_block_bio_complete(bdev_get_queue(bi->bi_bdev), - bi, 0); bio_endio(bi, 0); } } @@ -4754,11 +4748,8 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) handled++; } remaining = raid5_dec_bi_active_stripes(raid_bio); - if (remaining == 0) { - trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev), - raid_bio, 0); + if (remaining == 0) bio_endio(raid_bio, 0); - } if (atomic_dec_and_test(&conf->active_aligned_reads)) wake_up(&conf->wait_for_stripe); return handled; diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 383a727b8aa..6e5ad8ec0a2 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -1338,28 +1338,15 @@ static int isp_enable_clocks(struct isp_device *isp) { int r; unsigned long rate; - int divisor; - - /* - * cam_mclk clock chain: - * dpll4 -> dpll4_m5 -> dpll4_m5x2 -> cam_mclk - * - * In OMAP3630 dpll4_m5x2 != 2 x dpll4_m5 but both are - * set to the same value. Hence the rate set for dpll4_m5 - * has to be twice of what is set on OMAP3430 to get - * the required value for cam_mclk - */ - divisor = isp->revision == ISP_REVISION_15_0 ? 1 : 2; r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_ICK]); if (r) { dev_err(isp->dev, "failed to enable cam_ick clock\n"); goto out_clk_enable_ick; } - r = clk_set_rate(isp->clock[ISP_CLK_DPLL4_M5_CK], - CM_CAM_MCLK_HZ/divisor); + r = clk_set_rate(isp->clock[ISP_CLK_CAM_MCLK], CM_CAM_MCLK_HZ); if (r) { - dev_err(isp->dev, "clk_set_rate for dpll4_m5_ck failed\n"); + dev_err(isp->dev, "clk_set_rate for cam_mclk failed\n"); goto out_clk_enable_mclk; } r = clk_prepare_enable(isp->clock[ISP_CLK_CAM_MCLK]); @@ -1401,7 +1388,6 @@ static void isp_disable_clocks(struct isp_device *isp) static const char *isp_clocks[] = { "cam_ick", "cam_mclk", - "dpll4_m5_ck", "csi2_96m_fck", "l3_ick", }; diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h index 517d348ce32..c77e1f2ae5c 100644 --- a/drivers/media/platform/omap3isp/isp.h +++ b/drivers/media/platform/omap3isp/isp.h @@ -147,7 +147,6 @@ struct isp_platform_callback { * @ref_count: Reference count for handling multiple ISP requests. * @cam_ick: Pointer to camera interface clock structure. * @cam_mclk: Pointer to camera functional clock structure. - * @dpll4_m5_ck: Pointer to DPLL4 M5 clock structure. * @csi2_fck: Pointer to camera CSI2 complexIO clock structure. * @l3_ick: Pointer to OMAP3 L3 bus interface clock. * @irq: Currently attached ISP ISR callbacks information structure. @@ -189,10 +188,9 @@ struct isp_device { u32 xclk_divisor[2]; /* Two clocks, a and b. */ #define ISP_CLK_CAM_ICK 0 #define ISP_CLK_CAM_MCLK 1 -#define ISP_CLK_DPLL4_M5_CK 2 -#define ISP_CLK_CSI2_FCK 3 -#define ISP_CLK_L3_ICK 4 - struct clk *clock[5]; +#define ISP_CLK_CSI2_FCK 2 +#define ISP_CLK_L3_ICK 3 + struct clk *clock[4]; /* ISP modules */ struct ispstat isp_af; diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 742f5d7eb0f..a6f7190c09a 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -12,13 +12,13 @@ *----------------------------------------------------------------------------*/ #ifndef AAC_DRIVER_BUILD -# define AAC_DRIVER_BUILD 29801 +# define AAC_DRIVER_BUILD 30000 # define AAC_DRIVER_BRANCH "-ms" #endif #define MAXIMUM_NUM_CONTAINERS 32 #define AAC_NUM_MGT_FIB 8 -#define AAC_NUM_IO_FIB (512 - AAC_NUM_MGT_FIB) +#define AAC_NUM_IO_FIB (1024 - AAC_NUM_MGT_FIB) #define AAC_NUM_FIB (AAC_NUM_IO_FIB + AAC_NUM_MGT_FIB) #define AAC_MAX_LUN (8) @@ -36,6 +36,10 @@ #define CONTAINER_TO_ID(cont) (cont) #define CONTAINER_TO_LUN(cont) (0) +#define PMC_DEVICE_S7 0x28c +#define PMC_DEVICE_S8 0x28d +#define PMC_DEVICE_S9 0x28f + #define aac_phys_to_logical(x) ((x)+1) #define aac_logical_to_phys(x) ((x)?(x)-1:0) diff --git a/drivers/scsi/aacraid/comminit.c b/drivers/scsi/aacraid/comminit.c index 8e5d3be1612..3f759957f4b 100644 --- a/drivers/scsi/aacraid/comminit.c +++ b/drivers/scsi/aacraid/comminit.c @@ -404,7 +404,13 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) dev->max_fib_size = status[1] & 0xFFE0; host->sg_tablesize = status[2] >> 16; dev->sg_tablesize = status[2] & 0xFFFF; - host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB; + if (dev->pdev->device == PMC_DEVICE_S7 || + dev->pdev->device == PMC_DEVICE_S8 || + dev->pdev->device == PMC_DEVICE_S9) + host->can_queue = ((status[3] >> 16) ? (status[3] >> 16) : + (status[3] & 0xFFFF)) - AAC_NUM_MGT_FIB; + else + host->can_queue = (status[3] & 0xFFFF) - AAC_NUM_MGT_FIB; dev->max_num_aif = status[4] & 0xFFFF; /* * NOTE: @@ -452,6 +458,9 @@ struct aac_dev *aac_init_adapter(struct aac_dev *dev) } } + if (host->can_queue > AAC_NUM_IO_FIB) + host->can_queue = AAC_NUM_IO_FIB; + /* * Ok now init the communication subsystem */ diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index e6bf12675db..a5f7690e819 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -1034,7 +1034,7 @@ bfad_start_ops(struct bfad_s *bfad) { sizeof(driver_info.host_os_patch) - 1); strncpy(driver_info.os_device_name, bfad->pci_name, - sizeof(driver_info.os_device_name - 1)); + sizeof(driver_info.os_device_name) - 1); /* FCS driver info init */ spin_lock_irqsave(&bfad->bfad_lock, flags); diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h index 3486845ba30..50fcd018d14 100644 --- a/drivers/scsi/bnx2fc/bnx2fc.h +++ b/drivers/scsi/bnx2fc/bnx2fc.h @@ -64,7 +64,7 @@ #include "bnx2fc_constants.h" #define BNX2FC_NAME "bnx2fc" -#define BNX2FC_VERSION "1.0.12" +#define BNX2FC_VERSION "1.0.13" #define PFX "bnx2fc: " @@ -156,6 +156,18 @@ #define BNX2FC_RELOGIN_WAIT_TIME 200 #define BNX2FC_RELOGIN_WAIT_CNT 10 +#define BNX2FC_STATS(hba, stat, cnt) \ + do { \ + u32 val; \ + \ + val = fw_stats->stat.cnt; \ + if (hba->prev_stats.stat.cnt <= val) \ + val -= hba->prev_stats.stat.cnt; \ + else \ + val += (0xfffffff - hba->prev_stats.stat.cnt); \ + hba->bfw_stats.cnt += val; \ + } while (0) + /* bnx2fc driver uses only one instance of fcoe_percpu_s */ extern struct fcoe_percpu_s bnx2fc_global; @@ -167,6 +179,14 @@ struct bnx2fc_percpu_s { spinlock_t fp_work_lock; }; +struct bnx2fc_fw_stats { + u64 fc_crc_cnt; + u64 fcoe_tx_pkt_cnt; + u64 fcoe_rx_pkt_cnt; + u64 fcoe_tx_byte_cnt; + u64 fcoe_rx_byte_cnt; +}; + struct bnx2fc_hba { struct list_head list; struct cnic_dev *cnic; @@ -207,6 +227,8 @@ struct bnx2fc_hba { struct bnx2fc_rport **tgt_ofld_list; /* statistics */ + struct bnx2fc_fw_stats bfw_stats; + struct fcoe_statistics_params prev_stats; struct fcoe_statistics_params *stats_buffer; dma_addr_t stats_buf_dma; struct completion stat_req_done; @@ -280,6 +302,7 @@ struct bnx2fc_rport { #define BNX2FC_FLAG_UPLD_REQ_COMPL 0x7 #define BNX2FC_FLAG_EXPL_LOGO 0x8 #define BNX2FC_FLAG_DISABLE_FAILED 0x9 +#define BNX2FC_FLAG_ENABLED 0xa u8 src_addr[ETH_ALEN]; u32 max_sqes; @@ -468,6 +491,8 @@ int bnx2fc_send_fw_fcoe_init_msg(struct bnx2fc_hba *hba); int bnx2fc_send_fw_fcoe_destroy_msg(struct bnx2fc_hba *hba); int bnx2fc_send_session_ofld_req(struct fcoe_port *port, struct bnx2fc_rport *tgt); +int bnx2fc_send_session_enable_req(struct fcoe_port *port, + struct bnx2fc_rport *tgt); int bnx2fc_send_session_disable_req(struct fcoe_port *port, struct bnx2fc_rport *tgt); int bnx2fc_send_session_destroy_req(struct bnx2fc_hba *hba, diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 70ecd953a57..6401db494ef 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -22,7 +22,7 @@ DEFINE_PER_CPU(struct bnx2fc_percpu_s, bnx2fc_percpu); #define DRV_MODULE_NAME "bnx2fc" #define DRV_MODULE_VERSION BNX2FC_VERSION -#define DRV_MODULE_RELDATE "Jun 04, 2012" +#define DRV_MODULE_RELDATE "Dec 21, 2012" static char version[] = @@ -687,11 +687,16 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) BNX2FC_HBA_DBG(lport, "FW stat req timed out\n"); return bnx2fc_stats; } - bnx2fc_stats->invalid_crc_count += fw_stats->rx_stat2.fc_crc_cnt; - bnx2fc_stats->tx_frames += fw_stats->tx_stat.fcoe_tx_pkt_cnt; - bnx2fc_stats->tx_words += (fw_stats->tx_stat.fcoe_tx_byte_cnt) / 4; - bnx2fc_stats->rx_frames += fw_stats->rx_stat0.fcoe_rx_pkt_cnt; - bnx2fc_stats->rx_words += (fw_stats->rx_stat0.fcoe_rx_byte_cnt) / 4; + BNX2FC_STATS(hba, rx_stat2, fc_crc_cnt); + bnx2fc_stats->invalid_crc_count += hba->bfw_stats.fc_crc_cnt; + BNX2FC_STATS(hba, tx_stat, fcoe_tx_pkt_cnt); + bnx2fc_stats->tx_frames += hba->bfw_stats.fcoe_tx_pkt_cnt; + BNX2FC_STATS(hba, tx_stat, fcoe_tx_byte_cnt); + bnx2fc_stats->tx_words += ((hba->bfw_stats.fcoe_tx_byte_cnt) / 4); + BNX2FC_STATS(hba, rx_stat0, fcoe_rx_pkt_cnt); + bnx2fc_stats->rx_frames += hba->bfw_stats.fcoe_rx_pkt_cnt; + BNX2FC_STATS(hba, rx_stat0, fcoe_rx_byte_cnt); + bnx2fc_stats->rx_words += ((hba->bfw_stats.fcoe_rx_byte_cnt) / 4); bnx2fc_stats->dumped_frames = 0; bnx2fc_stats->lip_count = 0; @@ -700,6 +705,8 @@ static struct fc_host_statistics *bnx2fc_get_host_stats(struct Scsi_Host *shost) bnx2fc_stats->loss_of_signal_count = 0; bnx2fc_stats->prim_seq_protocol_err_count = 0; + memcpy(&hba->prev_stats, hba->stats_buffer, + sizeof(struct fcoe_statistics_params)); return bnx2fc_stats; } @@ -2660,7 +2667,7 @@ static struct scsi_host_template bnx2fc_shost_template = { .can_queue = BNX2FC_CAN_QUEUE, .use_clustering = ENABLE_CLUSTERING, .sg_tablesize = BNX2FC_MAX_BDS_PER_CMD, - .max_sectors = 512, + .max_sectors = 1024, }; static struct libfc_function_template bnx2fc_libfc_fcn_templ = { diff --git a/drivers/scsi/bnx2fc/bnx2fc_hwi.c b/drivers/scsi/bnx2fc/bnx2fc_hwi.c index ef60afa94d0..85ea98a80f4 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_hwi.c +++ b/drivers/scsi/bnx2fc/bnx2fc_hwi.c @@ -347,7 +347,7 @@ int bnx2fc_send_session_ofld_req(struct fcoe_port *port, * @port: port structure pointer * @tgt: bnx2fc_rport structure pointer */ -static int bnx2fc_send_session_enable_req(struct fcoe_port *port, +int bnx2fc_send_session_enable_req(struct fcoe_port *port, struct bnx2fc_rport *tgt) { struct kwqe *kwqe_arr[2]; @@ -759,8 +759,6 @@ static void bnx2fc_process_unsol_compl(struct bnx2fc_rport *tgt, u16 wqe) case FCOE_ERROR_CODE_DATA_SOFN_SEQ_ACTIVE_RESET: BNX2FC_TGT_DBG(tgt, "REC TOV popped for xid - 0x%x\n", xid); - memset(&io_req->err_entry, 0, - sizeof(struct fcoe_err_report_entry)); memcpy(&io_req->err_entry, err_entry, sizeof(struct fcoe_err_report_entry)); if (!test_bit(BNX2FC_FLAG_SRR_SENT, @@ -847,8 +845,6 @@ ret_err_rqe: goto ret_warn_rqe; } - memset(&io_req->err_entry, 0, - sizeof(struct fcoe_err_report_entry)); memcpy(&io_req->err_entry, err_entry, sizeof(struct fcoe_err_report_entry)); @@ -1124,7 +1120,6 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, struct bnx2fc_interface *interface; u32 conn_id; u32 context_id; - int rc; conn_id = ofld_kcqe->fcoe_conn_id; context_id = ofld_kcqe->fcoe_conn_context_id; @@ -1153,17 +1148,10 @@ static void bnx2fc_process_ofld_cmpl(struct bnx2fc_hba *hba, "resources\n"); set_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, &tgt->flags); } - goto ofld_cmpl_err; } else { - - /* now enable the session */ - rc = bnx2fc_send_session_enable_req(port, tgt); - if (rc) { - printk(KERN_ERR PFX "enable session failed\n"); - goto ofld_cmpl_err; - } + /* FW offload request successfully completed */ + set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); } - return; ofld_cmpl_err: set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); wake_up_interruptible(&tgt->ofld_wait); @@ -1210,15 +1198,9 @@ static void bnx2fc_process_enable_conn_cmpl(struct bnx2fc_hba *hba, printk(KERN_ERR PFX "bnx2fc-enbl_cmpl: HBA mis-match\n"); goto enbl_cmpl_err; } - if (ofld_kcqe->completion_status) - goto enbl_cmpl_err; - else { + if (!ofld_kcqe->completion_status) /* enable successful - rport ready for issuing IOs */ - set_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); - set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); - wake_up_interruptible(&tgt->ofld_wait); - } - return; + set_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); enbl_cmpl_err: set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); @@ -1251,6 +1233,7 @@ static void bnx2fc_process_conn_disable_cmpl(struct bnx2fc_hba *hba, /* disable successful */ BNX2FC_TGT_DBG(tgt, "disable successful\n"); clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_DISABLED, &tgt->flags); set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags); wake_up_interruptible(&tgt->upld_wait); diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c index 8d4626c07a1..60798e829de 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_io.c +++ b/drivers/scsi/bnx2fc/bnx2fc_io.c @@ -654,7 +654,7 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) mp_req->mp_resp_bd = dma_alloc_coherent(&hba->pcidev->dev, sz, &mp_req->mp_resp_bd_dma, GFP_ATOMIC); - if (!mp_req->mp_req_bd) { + if (!mp_req->mp_resp_bd) { printk(KERN_ERR PFX "unable to alloc MP resp bd\n"); bnx2fc_free_mp_resc(io_req); return FAILED; @@ -685,8 +685,8 @@ int bnx2fc_init_mp_req(struct bnx2fc_cmd *io_req) static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) { struct fc_lport *lport; - struct fc_rport *rport = starget_to_rport(scsi_target(sc_cmd->device)); - struct fc_rport_libfc_priv *rp = rport->dd_data; + struct fc_rport *rport; + struct fc_rport_libfc_priv *rp; struct fcoe_port *port; struct bnx2fc_interface *interface; struct bnx2fc_rport *tgt; @@ -704,6 +704,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) unsigned long start = jiffies; lport = shost_priv(host); + rport = starget_to_rport(scsi_target(sc_cmd->device)); port = lport_priv(lport); interface = port->priv; @@ -712,6 +713,7 @@ static int bnx2fc_initiate_tmf(struct scsi_cmnd *sc_cmd, u8 tm_flags) rc = FAILED; goto tmf_err; } + rp = rport->dd_data; rc = fc_block_scsi_eh(sc_cmd); if (rc) diff --git a/drivers/scsi/bnx2fc/bnx2fc_tgt.c b/drivers/scsi/bnx2fc/bnx2fc_tgt.c index b9d0d9cb17f..c57a3bb8a9f 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_tgt.c +++ b/drivers/scsi/bnx2fc/bnx2fc_tgt.c @@ -33,6 +33,7 @@ static void bnx2fc_upld_timer(unsigned long data) BNX2FC_TGT_DBG(tgt, "upld_timer - Upload compl not received!!\n"); /* fake upload completion */ clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_UPLD_REQ_COMPL, &tgt->flags); wake_up_interruptible(&tgt->upld_wait); } @@ -55,10 +56,25 @@ static void bnx2fc_ofld_timer(unsigned long data) * resources are freed up in bnx2fc_offload_session */ clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); + clear_bit(BNX2FC_FLAG_ENABLED, &tgt->flags); set_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); wake_up_interruptible(&tgt->ofld_wait); } +static void bnx2fc_ofld_wait(struct bnx2fc_rport *tgt) +{ + setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt); + mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT); + + wait_event_interruptible(tgt->ofld_wait, + (test_bit( + BNX2FC_FLAG_OFLD_REQ_CMPL, + &tgt->flags))); + if (signal_pending(current)) + flush_signals(current); + del_timer_sync(&tgt->ofld_timer); +} + static void bnx2fc_offload_session(struct fcoe_port *port, struct bnx2fc_rport *tgt, struct fc_rport_priv *rdata) @@ -103,17 +119,7 @@ retry_ofld: * wait for the session is offloaded and enabled. 3 Secs * should be ample time for this process to complete. */ - setup_timer(&tgt->ofld_timer, bnx2fc_ofld_timer, (unsigned long)tgt); - mod_timer(&tgt->ofld_timer, jiffies + BNX2FC_FW_TIMEOUT); - - wait_event_interruptible(tgt->ofld_wait, - (test_bit( - BNX2FC_FLAG_OFLD_REQ_CMPL, - &tgt->flags))); - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->ofld_timer); + bnx2fc_ofld_wait(tgt); if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) { if (test_and_clear_bit(BNX2FC_FLAG_CTX_ALLOC_FAILURE, @@ -131,14 +137,23 @@ retry_ofld: } if (bnx2fc_map_doorbell(tgt)) { printk(KERN_ERR PFX "map doorbell failed - no mem\n"); - /* upload will take care of cleaning up sess resc */ - lport->tt.rport_logoff(rdata); + goto ofld_err; } + clear_bit(BNX2FC_FLAG_OFLD_REQ_CMPL, &tgt->flags); + rval = bnx2fc_send_session_enable_req(port, tgt); + if (rval) { + pr_err(PFX "enable session failed\n"); + goto ofld_err; + } + bnx2fc_ofld_wait(tgt); + if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) + goto ofld_err; return; ofld_err: /* couldn't offload the session. log off from this rport */ BNX2FC_TGT_DBG(tgt, "bnx2fc_offload_session - offload error\n"); + clear_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags); /* Free session resources */ bnx2fc_free_session_resc(hba, tgt); tgt_init_err: @@ -259,6 +274,19 @@ void bnx2fc_flush_active_ios(struct bnx2fc_rport *tgt) spin_unlock_bh(&tgt->tgt_lock); } +static void bnx2fc_upld_wait(struct bnx2fc_rport *tgt) +{ + setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt); + mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); + wait_event_interruptible(tgt->upld_wait, + (test_bit( + BNX2FC_FLAG_UPLD_REQ_COMPL, + &tgt->flags))); + if (signal_pending(current)) + flush_signals(current); + del_timer_sync(&tgt->upld_timer); +} + static void bnx2fc_upload_session(struct fcoe_port *port, struct bnx2fc_rport *tgt) { @@ -279,19 +307,8 @@ static void bnx2fc_upload_session(struct fcoe_port *port, * wait for upload to complete. 3 Secs * should be sufficient time for this process to complete. */ - setup_timer(&tgt->upld_timer, bnx2fc_upld_timer, (unsigned long)tgt); - mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); - BNX2FC_TGT_DBG(tgt, "waiting for disable compl\n"); - wait_event_interruptible(tgt->upld_wait, - (test_bit( - BNX2FC_FLAG_UPLD_REQ_COMPL, - &tgt->flags))); - - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->upld_timer); + bnx2fc_upld_wait(tgt); /* * traverse thru the active_q and tmf_q and cleanup @@ -308,24 +325,13 @@ static void bnx2fc_upload_session(struct fcoe_port *port, bnx2fc_send_session_destroy_req(hba, tgt); /* wait for destroy to complete */ - setup_timer(&tgt->upld_timer, - bnx2fc_upld_timer, (unsigned long)tgt); - mod_timer(&tgt->upld_timer, jiffies + BNX2FC_FW_TIMEOUT); - - wait_event_interruptible(tgt->upld_wait, - (test_bit( - BNX2FC_FLAG_UPLD_REQ_COMPL, - &tgt->flags))); + bnx2fc_upld_wait(tgt); if (!(test_bit(BNX2FC_FLAG_DESTROYED, &tgt->flags))) printk(KERN_ERR PFX "ERROR!! destroy timed out\n"); BNX2FC_TGT_DBG(tgt, "destroy wait complete flags = 0x%lx\n", tgt->flags); - if (signal_pending(current)) - flush_signals(current); - - del_timer_sync(&tgt->upld_timer); } else if (test_bit(BNX2FC_FLAG_DISABLE_FAILED, &tgt->flags)) { printk(KERN_ERR PFX "ERROR!! DISABLE req failed, destroy" @@ -381,7 +387,9 @@ static int bnx2fc_init_tgt(struct bnx2fc_rport *tgt, tgt->rq_cons_idx = 0; atomic_set(&tgt->num_active_ios, 0); - if (rdata->flags & FC_RP_FLAGS_RETRY) { + if (rdata->flags & FC_RP_FLAGS_RETRY && + rdata->ids.roles & FC_RPORT_ROLE_FCP_TARGET && + !(rdata->ids.roles & FC_RPORT_ROLE_FCP_INITIATOR)) { tgt->dev_type = TYPE_TAPE; tgt->io_timeout = 0; /* use default ULP timeout */ } else { @@ -479,7 +487,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, tgt = (struct bnx2fc_rport *)&rp[1]; /* This can happen when ADISC finds the same target */ - if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) { + if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) { BNX2FC_TGT_DBG(tgt, "already offloaded\n"); mutex_unlock(&hba->hba_mutex); return; @@ -494,11 +502,8 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, BNX2FC_TGT_DBG(tgt, "OFFLOAD num_ofld_sess = %d\n", hba->num_ofld_sess); - if (test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags)) { - /* - * Session is offloaded and enabled. Map - * doorbell register for this target - */ + if (test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags)) { + /* Session is offloaded and enabled. */ BNX2FC_TGT_DBG(tgt, "sess offloaded\n"); /* This counter is protected with hba mutex */ hba->num_ofld_sess++; @@ -535,7 +540,7 @@ void bnx2fc_rport_event_handler(struct fc_lport *lport, */ tgt = (struct bnx2fc_rport *)&rp[1]; - if (!(test_bit(BNX2FC_FLAG_OFFLOADED, &tgt->flags))) { + if (!(test_bit(BNX2FC_FLAG_ENABLED, &tgt->flags))) { mutex_unlock(&hba->hba_mutex); break; } diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 91eec60252e..a28b03e5a5f 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -1317,7 +1317,7 @@ int bnx2i_send_fw_iscsi_init_msg(struct bnx2i_hba *hba) (1ULL << ISCSI_KCQE_COMPLETION_STATUS_PROTOCOL_ERR_LUN)); if (error_mask1) { iscsi_init2.error_bit_map[0] = error_mask1; - mask64 &= (u32)(~mask64); + mask64 ^= (u32)(mask64); mask64 |= error_mask1; } else iscsi_init2.error_bit_map[0] = (u32) mask64; diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c index 8ecdb94a59f..bdd78fb4fc7 100644 --- a/drivers/scsi/csiostor/csio_hw.c +++ b/drivers/scsi/csiostor/csio_hw.c @@ -2131,13 +2131,16 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) value_to_add = 4 - (cf->size % 4); cfg_data = kzalloc(cf->size+value_to_add, GFP_KERNEL); - if (cfg_data == NULL) - return -ENOMEM; + if (cfg_data == NULL) { + ret = -ENOMEM; + goto leave; + } memcpy((void *)cfg_data, (const void *)cf->data, cf->size); - - if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) - return -EINVAL; + if (csio_hw_check_fwconfig(hw, fw_cfg_param) != 0) { + ret = -EINVAL; + goto leave; + } mtype = FW_PARAMS_PARAM_Y_GET(*fw_cfg_param); maddr = FW_PARAMS_PARAM_Z_GET(*fw_cfg_param) << 16; @@ -2149,9 +2152,9 @@ csio_hw_flash_config(struct csio_hw *hw, u32 *fw_cfg_param, char *path) strncpy(path, "/lib/firmware/" CSIO_CF_FNAME, 64); } +leave: kfree(cfg_data); release_firmware(cf); - return ret; } diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index c323b2030af..0604b5ff363 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -60,13 +60,6 @@ static struct scsi_transport_template *csio_fcoe_transport_vport; /* * debugfs support */ -static int -csio_mem_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -110,7 +103,7 @@ csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) static const struct file_operations csio_mem_debugfs_fops = { .owner = THIS_MODULE, - .open = csio_mem_open, + .open = simple_open, .read = csio_mem_read, .llseek = default_llseek, }; diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index f924b3c3720..3fecf35ba29 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -1564,6 +1564,7 @@ static int t4_uld_state_change(void *handle, enum cxgb4_state state) break; case CXGB4_STATE_DETACH: pr_info("cdev 0x%p, DETACH.\n", cdev); + cxgbi_device_unregister(cdev); break; default: pr_info("cdev 0x%p, unknown state %d.\n", cdev, state); diff --git a/drivers/scsi/fnic/fnic_fcs.c b/drivers/scsi/fnic/fnic_fcs.c index 3c53c3478ee..483eb9dbe66 100644 --- a/drivers/scsi/fnic/fnic_fcs.c +++ b/drivers/scsi/fnic/fnic_fcs.c @@ -495,7 +495,8 @@ void fnic_eth_send(struct fcoe_ctlr *fip, struct sk_buff *skb) } fnic_queue_wq_eth_desc(wq, skb, pa, skb->len, - fnic->vlan_hw_insert, fnic->vlan_id, 1); + 0 /* hw inserts cos value */, + fnic->vlan_id, 1); spin_unlock_irqrestore(&fnic->wq_lock[0], flags); } @@ -563,7 +564,8 @@ static int fnic_send_frame(struct fnic *fnic, struct fc_frame *fp) } fnic_queue_wq_desc(wq, skb, pa, tot_len, fr_eof(fp), - fnic->vlan_hw_insert, fnic->vlan_id, 1, 1, 1); + 0 /* hw inserts cos value */, + fnic->vlan_id, 1, 1, 1); fnic_send_frame_end: spin_unlock_irqrestore(&fnic->wq_lock[0], flags); diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c index 599790e41a9..59bceac51a4 100644 --- a/drivers/scsi/gdth.c +++ b/drivers/scsi/gdth.c @@ -1107,14 +1107,8 @@ static int gdth_init_pci(struct pci_dev *pdev, gdth_pci_str *pcistr, pci_read_config_word(pdev, PCI_COMMAND, &command); command |= 6; pci_write_config_word(pdev, PCI_COMMAND, command); - if (pci_resource_start(pdev, 8) == 1UL) - pci_resource_start(pdev, 8) = 0UL; - i = 0xFEFF0001UL; - pci_write_config_dword(pdev, PCI_ROM_ADDRESS, i); - gdth_delay(1); - pci_write_config_dword(pdev, PCI_ROM_ADDRESS, - pci_resource_start(pdev, 8)); - + gdth_delay(1); + dp6m_ptr = ha->brd; /* Ensure that it is safe to access the non HW portions of DPMEM. diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 1d7da3f41eb..8fa79b83f2d 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -98,6 +98,7 @@ static unsigned int ipr_transop_timeout = 0; static unsigned int ipr_debug = 0; static unsigned int ipr_max_devs = IPR_DEFAULT_SIS64_DEVS; static unsigned int ipr_dual_ioa_raid = 1; +static unsigned int ipr_number_of_msix = 2; static DEFINE_SPINLOCK(ipr_driver_lock); /* This table describes the differences between DMA controller chips */ @@ -107,6 +108,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 100, .cache_line_size = 0x20, .clear_isr = 1, + .iopoll_weight = 0, { .set_interrupt_mask_reg = 0x0022C, .clr_interrupt_mask_reg = 0x00230, @@ -131,6 +133,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 100, .cache_line_size = 0x20, .clear_isr = 1, + .iopoll_weight = 0, { .set_interrupt_mask_reg = 0x00288, .clr_interrupt_mask_reg = 0x0028C, @@ -155,6 +158,7 @@ static const struct ipr_chip_cfg_t ipr_chip_cfg[] = { .max_cmds = 1000, .cache_line_size = 0x20, .clear_isr = 0, + .iopoll_weight = 64, { .set_interrupt_mask_reg = 0x00010, .clr_interrupt_mask_reg = 0x00018, @@ -215,6 +219,8 @@ MODULE_PARM_DESC(dual_ioa_raid, "Enable dual adapter RAID support. Set to 1 to e module_param_named(max_devs, ipr_max_devs, int, 0); MODULE_PARM_DESC(max_devs, "Specify the maximum number of physical devices. " "[Default=" __stringify(IPR_DEFAULT_SIS64_DEVS) "]"); +module_param_named(number_of_msix, ipr_number_of_msix, int, 0); +MODULE_PARM_DESC(number_of_msix, "Specify the number of MSIX interrupts to use on capable adapters (1 - 5). (default:2)"); MODULE_LICENSE("GPL"); MODULE_VERSION(IPR_DRIVER_VERSION); @@ -549,7 +555,8 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, struct ipr_trace_entry *trace_entry; struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - trace_entry = &ioa_cfg->trace[ioa_cfg->trace_index++]; + trace_entry = &ioa_cfg->trace[atomic_add_return + (1, &ioa_cfg->trace_index)%IPR_NUM_TRACE_ENTRIES]; trace_entry->time = jiffies; trace_entry->op_code = ipr_cmd->ioarcb.cmd_pkt.cdb[0]; trace_entry->type = type; @@ -560,6 +567,7 @@ static void ipr_trc_hook(struct ipr_cmnd *ipr_cmd, trace_entry->cmd_index = ipr_cmd->cmd_index & 0xff; trace_entry->res_handle = ipr_cmd->ioarcb.res_handle; trace_entry->u.add_data = add_data; + wmb(); } #else #define ipr_trc_hook(ipr_cmd, type, add_data) do { } while (0) @@ -595,8 +603,11 @@ static void ipr_reinit_ipr_cmnd(struct ipr_cmnd *ipr_cmd) struct ipr_ioasa *ioasa = &ipr_cmd->s.ioasa; struct ipr_ioasa64 *ioasa64 = &ipr_cmd->s.ioasa64; dma_addr_t dma_addr = ipr_cmd->dma_addr; + int hrrq_id; + hrrq_id = ioarcb->cmd_pkt.hrrq_id; memset(&ioarcb->cmd_pkt, 0, sizeof(struct ipr_cmd_pkt)); + ioarcb->cmd_pkt.hrrq_id = hrrq_id; ioarcb->data_transfer_length = 0; ioarcb->read_data_transfer_length = 0; ioarcb->ioadl_len = 0; @@ -646,12 +657,16 @@ static void ipr_init_ipr_cmnd(struct ipr_cmnd *ipr_cmd, * pointer to ipr command struct **/ static -struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) +struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_hrr_queue *hrrq) { - struct ipr_cmnd *ipr_cmd; + struct ipr_cmnd *ipr_cmd = NULL; + + if (likely(!list_empty(&hrrq->hrrq_free_q))) { + ipr_cmd = list_entry(hrrq->hrrq_free_q.next, + struct ipr_cmnd, queue); + list_del(&ipr_cmd->queue); + } - ipr_cmd = list_entry(ioa_cfg->free_q.next, struct ipr_cmnd, queue); - list_del(&ipr_cmd->queue); return ipr_cmd; } @@ -666,7 +681,8 @@ struct ipr_cmnd *__ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) static struct ipr_cmnd *ipr_get_free_ipr_cmnd(struct ipr_ioa_cfg *ioa_cfg) { - struct ipr_cmnd *ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg); + struct ipr_cmnd *ipr_cmd = + __ipr_get_free_ipr_cmnd(&ioa_cfg->hrrq[IPR_INIT_HRRQ]); ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done); return ipr_cmd; } @@ -686,9 +702,15 @@ static void ipr_mask_and_clear_interrupts(struct ipr_ioa_cfg *ioa_cfg, u32 clr_ints) { volatile u32 int_reg; + int i; /* Stop new interrupts */ - ioa_cfg->allow_interrupts = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); /* Set interrupt mask to stop all new interrupts */ if (ioa_cfg->sis64) @@ -761,13 +783,12 @@ static int ipr_set_pcix_cmd_reg(struct ipr_ioa_cfg *ioa_cfg) **/ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ata_queued_cmd *qc = ipr_cmd->qc; struct ipr_sata_port *sata_port = qc->ap->private_data; qc->err_mask |= AC_ERR_OTHER; sata_port->ioasa.status |= ATA_BUSY; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); ata_qc_complete(qc); } @@ -783,14 +804,13 @@ static void ipr_sata_eh_done(struct ipr_cmnd *ipr_cmd) **/ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; scsi_cmd->result |= (DID_ERROR << 16); scsi_dma_unmap(ipr_cmd->scsi_cmd); scsi_cmd->scsi_done(scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } /** @@ -805,24 +825,32 @@ static void ipr_scsi_eh_done(struct ipr_cmnd *ipr_cmd) static void ipr_fail_all_ops(struct ipr_ioa_cfg *ioa_cfg) { struct ipr_cmnd *ipr_cmd, *temp; + struct ipr_hrr_queue *hrrq; ENTER; - list_for_each_entry_safe(ipr_cmd, temp, &ioa_cfg->pending_q, queue) { - list_del(&ipr_cmd->queue); + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry_safe(ipr_cmd, + temp, &hrrq->hrrq_pending_q, queue) { + list_del(&ipr_cmd->queue); - ipr_cmd->s.ioasa.hdr.ioasc = cpu_to_be32(IPR_IOASC_IOA_WAS_RESET); - ipr_cmd->s.ioasa.hdr.ilid = cpu_to_be32(IPR_DRIVER_ILID); + ipr_cmd->s.ioasa.hdr.ioasc = + cpu_to_be32(IPR_IOASC_IOA_WAS_RESET); + ipr_cmd->s.ioasa.hdr.ilid = + cpu_to_be32(IPR_DRIVER_ILID); - if (ipr_cmd->scsi_cmd) - ipr_cmd->done = ipr_scsi_eh_done; - else if (ipr_cmd->qc) - ipr_cmd->done = ipr_sata_eh_done; + if (ipr_cmd->scsi_cmd) + ipr_cmd->done = ipr_scsi_eh_done; + else if (ipr_cmd->qc) + ipr_cmd->done = ipr_sata_eh_done; - ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, IPR_IOASC_IOA_WAS_RESET); - del_timer(&ipr_cmd->timer); - ipr_cmd->done(ipr_cmd); + ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, + IPR_IOASC_IOA_WAS_RESET); + del_timer(&ipr_cmd->timer); + ipr_cmd->done(ipr_cmd); + } + spin_unlock(&hrrq->_lock); } - LEAVE; } @@ -872,9 +900,7 @@ static void ipr_do_req(struct ipr_cmnd *ipr_cmd, void (*done) (struct ipr_cmnd *), void (*timeout_func) (struct ipr_cmnd *), u32 timeout) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = done; @@ -975,6 +1001,14 @@ static void ipr_send_blocking_cmd(struct ipr_cmnd *ipr_cmd, spin_lock_irq(ioa_cfg->host->host_lock); } +static int ipr_get_hrrq_index(struct ipr_ioa_cfg *ioa_cfg) +{ + if (ioa_cfg->hrrq_num == 1) + return 0; + else + return (atomic_add_return(1, &ioa_cfg->hrrq_index) % (ioa_cfg->hrrq_num - 1)) + 1; +} + /** * ipr_send_hcam - Send an HCAM to the adapter. * @ioa_cfg: ioa config struct @@ -994,9 +1028,9 @@ static void ipr_send_hcam(struct ipr_ioa_cfg *ioa_cfg, u8 type, struct ipr_cmnd *ipr_cmd; struct ipr_ioarcb *ioarcb; - if (ioa_cfg->allow_cmds) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); list_add_tail(&hostrcb->queue, &ioa_cfg->hostrcb_pending_q); ipr_cmd->u.hostrcb = hostrcb; @@ -1166,14 +1200,15 @@ static int ipr_is_same_device(struct ipr_resource_entry *res, } /** - * ipr_format_res_path - Format the resource path for printing. + * __ipr_format_res_path - Format the resource path for printing. * @res_path: resource path * @buf: buffer + * @len: length of buffer provided * * Return value: * pointer to buffer **/ -static char *ipr_format_res_path(u8 *res_path, char *buffer, int len) +static char *__ipr_format_res_path(u8 *res_path, char *buffer, int len) { int i; char *p = buffer; @@ -1187,6 +1222,27 @@ static char *ipr_format_res_path(u8 *res_path, char *buffer, int len) } /** + * ipr_format_res_path - Format the resource path for printing. + * @ioa_cfg: ioa config struct + * @res_path: resource path + * @buf: buffer + * @len: length of buffer provided + * + * Return value: + * pointer to buffer + **/ +static char *ipr_format_res_path(struct ipr_ioa_cfg *ioa_cfg, + u8 *res_path, char *buffer, int len) +{ + char *p = buffer; + + *p = '\0'; + p += snprintf(p, buffer + len - p, "%d/", ioa_cfg->host->host_no); + __ipr_format_res_path(res_path, p, len - (buffer - p)); + return buffer; +} + +/** * ipr_update_res_entry - Update the resource entry. * @res: resource entry struct * @cfgtew: config table entry wrapper struct @@ -1226,8 +1282,8 @@ static void ipr_update_res_entry(struct ipr_resource_entry *res, if (res->sdev && new_path) sdev_printk(KERN_INFO, res->sdev, "Resource path: %s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(res->ioa_cfg, + res->res_path, buffer, sizeof(buffer))); } else { res->flags = cfgtew->u.cfgte->flags; if (res->flags & IPR_IS_IOA_RESOURCE) @@ -1363,7 +1419,7 @@ static void ipr_process_ccn(struct ipr_cmnd *ipr_cmd) u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); list_del(&hostrcb->queue); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (ioasc) { if (ioasc != IPR_IOASC_IOA_WAS_RESET) @@ -1613,8 +1669,8 @@ static void ipr_log_sis64_config_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err_separator; ipr_err("Device %d : %s", i + 1, - ipr_format_res_path(dev_entry->res_path, buffer, - sizeof(buffer))); + __ipr_format_res_path(dev_entry->res_path, + buffer, sizeof(buffer))); ipr_log_ext_vpd(&dev_entry->vpd); ipr_err("-----New Device Information-----\n"); @@ -1960,14 +2016,16 @@ static void ipr_log64_fabric_path(struct ipr_hostrcb *hostrcb, ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s\n", path_active_desc[i].desc, path_state_desc[j].desc, - ipr_format_res_path(fabric->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(hostrcb->ioa_cfg, + fabric->res_path, + buffer, sizeof(buffer))); return; } } ipr_err("Path state=%02X Resource Path=%s\n", path_state, - ipr_format_res_path(fabric->res_path, buffer, sizeof(buffer))); + ipr_format_res_path(hostrcb->ioa_cfg, fabric->res_path, + buffer, sizeof(buffer))); } static const struct { @@ -2108,18 +2166,20 @@ static void ipr_log64_path_elem(struct ipr_hostrcb *hostrcb, ipr_hcam_err(hostrcb, "%s %s: Resource Path=%s, Link rate=%s, WWN=%08X%08X\n", path_status_desc[j].desc, path_type_desc[i].desc, - ipr_format_res_path(cfg->res_path, buffer, - sizeof(buffer)), - link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], - be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); + ipr_format_res_path(hostrcb->ioa_cfg, + cfg->res_path, buffer, sizeof(buffer)), + link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], + be32_to_cpu(cfg->wwid[0]), + be32_to_cpu(cfg->wwid[1])); return; } } ipr_hcam_err(hostrcb, "Path element=%02X: Resource Path=%s, Link rate=%s " "WWN=%08X%08X\n", cfg->type_status, - ipr_format_res_path(cfg->res_path, buffer, sizeof(buffer)), - link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], - be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); + ipr_format_res_path(hostrcb->ioa_cfg, + cfg->res_path, buffer, sizeof(buffer)), + link_rate[cfg->link_rate & IPR_PHY_LINK_RATE_MASK], + be32_to_cpu(cfg->wwid[0]), be32_to_cpu(cfg->wwid[1])); } /** @@ -2182,7 +2242,8 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err("RAID %s Array Configuration: %s\n", error->protection_level, - ipr_format_res_path(error->last_res_path, buffer, sizeof(buffer))); + ipr_format_res_path(ioa_cfg, error->last_res_path, + buffer, sizeof(buffer))); ipr_err_separator; @@ -2203,11 +2264,12 @@ static void ipr_log_sis64_array_error(struct ipr_ioa_cfg *ioa_cfg, ipr_err("Array Member %d:\n", i); ipr_log_ext_vpd(&array_entry->vpd); ipr_err("Current Location: %s\n", - ipr_format_res_path(array_entry->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(ioa_cfg, array_entry->res_path, + buffer, sizeof(buffer))); ipr_err("Expected Location: %s\n", - ipr_format_res_path(array_entry->expected_res_path, - buffer, sizeof(buffer))); + ipr_format_res_path(ioa_cfg, + array_entry->expected_res_path, + buffer, sizeof(buffer))); ipr_err_separator; } @@ -2409,7 +2471,7 @@ static void ipr_process_error(struct ipr_cmnd *ipr_cmd) fd_ioasc = be32_to_cpu(hostrcb->hcam.u.error.fd_ioasc); list_del(&hostrcb->queue); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (!ioasc) { ipr_handle_log_data(ioa_cfg, hostrcb); @@ -2491,36 +2553,6 @@ static void ipr_oper_timeout(struct ipr_cmnd *ipr_cmd) } /** - * ipr_reset_reload - Reset/Reload the IOA - * @ioa_cfg: ioa config struct - * @shutdown_type: shutdown type - * - * This function resets the adapter and re-initializes it. - * This function assumes that all new host commands have been stopped. - * Return value: - * SUCCESS / FAILED - **/ -static int ipr_reset_reload(struct ipr_ioa_cfg *ioa_cfg, - enum ipr_shutdown_type shutdown_type) -{ - if (!ioa_cfg->in_reset_reload) - ipr_initiate_ioa_reset(ioa_cfg, shutdown_type); - - spin_unlock_irq(ioa_cfg->host->host_lock); - wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); - spin_lock_irq(ioa_cfg->host->host_lock); - - /* If we got hit with a host reset while we were already resetting - the adapter for some reason, and the reset failed. */ - if (ioa_cfg->ioa_is_dead) { - ipr_trace; - return FAILED; - } - - return SUCCESS; -} - -/** * ipr_find_ses_entry - Find matching SES in SES table * @res: resource entry struct of SES * @@ -3153,7 +3185,8 @@ static void ipr_worker_thread(struct work_struct *work) restart: do { did_work = 0; - if (!ioa_cfg->allow_cmds || !ioa_cfg->allow_ml_add_del) { + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds || + !ioa_cfg->allow_ml_add_del) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); return; } @@ -3401,7 +3434,7 @@ static ssize_t ipr_show_adapter_state(struct device *dev, int len; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - if (ioa_cfg->ioa_is_dead) + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) len = snprintf(buf, PAGE_SIZE, "offline\n"); else len = snprintf(buf, PAGE_SIZE, "online\n"); @@ -3427,14 +3460,20 @@ static ssize_t ipr_store_adapter_state(struct device *dev, struct Scsi_Host *shost = class_to_shost(dev); struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; unsigned long lock_flags; - int result = count; + int result = count, i; if (!capable(CAP_SYS_ADMIN)) return -EACCES; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - if (ioa_cfg->ioa_is_dead && !strncmp(buf, "online", 6)) { - ioa_cfg->ioa_is_dead = 0; + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && + !strncmp(buf, "online", 6)) { + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].ioa_is_dead = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ioa_cfg->reset_retries = 0; ioa_cfg->in_ioa_bringdown = 0; ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); @@ -3494,6 +3533,95 @@ static struct device_attribute ipr_ioa_reset_attr = { .store = ipr_store_reset_adapter }; +static int ipr_iopoll(struct blk_iopoll *iop, int budget); + /** + * ipr_show_iopoll_weight - Show ipr polling mode + * @dev: class device struct + * @buf: buffer + * + * Return value: + * number of bytes printed to buffer + **/ +static ssize_t ipr_show_iopoll_weight(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; + unsigned long lock_flags = 0; + int len; + + spin_lock_irqsave(shost->host_lock, lock_flags); + len = snprintf(buf, PAGE_SIZE, "%d\n", ioa_cfg->iopoll_weight); + spin_unlock_irqrestore(shost->host_lock, lock_flags); + + return len; +} + +/** + * ipr_store_iopoll_weight - Change the adapter's polling mode + * @dev: class device struct + * @buf: buffer + * + * Return value: + * number of bytes printed to buffer + **/ +static ssize_t ipr_store_iopoll_weight(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct Scsi_Host *shost = class_to_shost(dev); + struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; + unsigned long user_iopoll_weight; + unsigned long lock_flags = 0; + int i; + + if (!ioa_cfg->sis64) { + dev_info(&ioa_cfg->pdev->dev, "blk-iopoll not supported on this adapter\n"); + return -EINVAL; + } + if (kstrtoul(buf, 10, &user_iopoll_weight)) + return -EINVAL; + + if (user_iopoll_weight > 256) { + dev_info(&ioa_cfg->pdev->dev, "Invalid blk-iopoll weight. It must be less than 256\n"); + return -EINVAL; + } + + if (user_iopoll_weight == ioa_cfg->iopoll_weight) { + dev_info(&ioa_cfg->pdev->dev, "Current blk-iopoll weight has the same weight\n"); + return strlen(buf); + } + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) + blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + } + + spin_lock_irqsave(shost->host_lock, lock_flags); + ioa_cfg->iopoll_weight = user_iopoll_weight; + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) { + blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + ioa_cfg->iopoll_weight, ipr_iopoll); + blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + } + } + spin_unlock_irqrestore(shost->host_lock, lock_flags); + + return strlen(buf); +} + +static struct device_attribute ipr_iopoll_weight_attr = { + .attr = { + .name = "iopoll_weight", + .mode = S_IRUGO | S_IWUSR, + }, + .show = ipr_show_iopoll_weight, + .store = ipr_store_iopoll_weight +}; + /** * ipr_alloc_ucode_buffer - Allocates a microcode download buffer * @buf_len: buffer length @@ -3862,6 +3990,7 @@ static struct device_attribute *ipr_ioa_attrs[] = { &ipr_ioa_reset_attr, &ipr_update_fw_attr, &ipr_ioa_fw_type_attr, + &ipr_iopoll_weight_attr, NULL, }; @@ -4014,7 +4143,7 @@ static int ipr_alloc_dump(struct ipr_ioa_cfg *ioa_cfg) ioa_cfg->dump = dump; ioa_cfg->sdt_state = WAIT_FOR_DUMP; - if (ioa_cfg->ioa_is_dead && !ioa_cfg->dump_taken) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead && !ioa_cfg->dump_taken) { ioa_cfg->dump_taken = 1; schedule_work(&ioa_cfg->work_q); } @@ -4227,8 +4356,8 @@ static ssize_t ipr_show_resource_path(struct device *dev, struct device_attribut res = (struct ipr_resource_entry *)sdev->hostdata; if (res && ioa_cfg->sis64) len = snprintf(buf, PAGE_SIZE, "%s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + __ipr_format_res_path(res->res_path, buffer, + sizeof(buffer))); else if (res) len = snprintf(buf, PAGE_SIZE, "%d:%d:%d:%d\n", ioa_cfg->host->host_no, res->bus, res->target, res->lun); @@ -4556,8 +4685,8 @@ static int ipr_slave_configure(struct scsi_device *sdev) scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun); if (ioa_cfg->sis64) sdev_printk(KERN_INFO, sdev, "Resource path: %s\n", - ipr_format_res_path(res->res_path, buffer, - sizeof(buffer))); + ipr_format_res_path(ioa_cfg, + res->res_path, buffer, sizeof(buffer))); return 0; } spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); @@ -4638,22 +4767,18 @@ static int ipr_slave_alloc(struct scsi_device *sdev) return rc; } -/** - * ipr_eh_host_reset - Reset the host adapter - * @scsi_cmd: scsi command struct - * - * Return value: - * SUCCESS / FAILED - **/ -static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd) +static int ipr_eh_host_reset(struct scsi_cmnd *cmd) { struct ipr_ioa_cfg *ioa_cfg; - int rc; + unsigned long lock_flags = 0; + int rc = SUCCESS; ENTER; - ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; + ioa_cfg = (struct ipr_ioa_cfg *) cmd->device->host->hostdata; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); if (!ioa_cfg->in_reset_reload) { + ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_ABBREV); dev_err(&ioa_cfg->pdev->dev, "Adapter being reset as a result of error recovery.\n"); @@ -4661,20 +4786,19 @@ static int __ipr_eh_host_reset(struct scsi_cmnd *scsi_cmd) ioa_cfg->sdt_state = GET_DUMP; } - rc = ipr_reset_reload(ioa_cfg, IPR_SHUTDOWN_ABBREV); - - LEAVE; - return rc; -} - -static int ipr_eh_host_reset(struct scsi_cmnd *cmd) -{ - int rc; + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - spin_lock_irq(cmd->device->host->host_lock); - rc = __ipr_eh_host_reset(cmd); - spin_unlock_irq(cmd->device->host->host_lock); + /* If we got hit with a host reset while we were already resetting + the adapter for some reason, and the reset failed. */ + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { + ipr_trace; + rc = FAILED; + } + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + LEAVE; return rc; } @@ -4723,7 +4847,7 @@ static int ipr_device_reset(struct ipr_ioa_cfg *ioa_cfg, ipr_send_blocking_cmd(ipr_cmd, ipr_timeout, IPR_DEVICE_RESET_TIMEOUT); ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); if (ipr_is_gata(res) && res->sata_port && ioasc != IPR_IOASC_IOA_WAS_RESET) { if (ipr_cmd->ioa_cfg->sis64) memcpy(&res->sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata, @@ -4793,6 +4917,7 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) struct ipr_resource_entry *res; struct ata_port *ap; int rc = 0; + struct ipr_hrr_queue *hrrq; ENTER; ioa_cfg = (struct ipr_ioa_cfg *) scsi_cmd->device->host->hostdata; @@ -4808,22 +4933,26 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) */ if (ioa_cfg->in_reset_reload) return FAILED; - if (ioa_cfg->ioa_is_dead) + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return FAILED; - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->ioarcb.res_handle == res->res_handle) { - if (ipr_cmd->scsi_cmd) - ipr_cmd->done = ipr_scsi_eh_done; - if (ipr_cmd->qc) - ipr_cmd->done = ipr_sata_eh_done; - if (ipr_cmd->qc && !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) { - ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT; - ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->ioarcb.res_handle == res->res_handle) { + if (ipr_cmd->scsi_cmd) + ipr_cmd->done = ipr_scsi_eh_done; + if (ipr_cmd->qc) + ipr_cmd->done = ipr_sata_eh_done; + if (ipr_cmd->qc && + !(ipr_cmd->qc->flags & ATA_QCFLAG_FAILED)) { + ipr_cmd->qc->err_mask |= AC_ERR_TIMEOUT; + ipr_cmd->qc->flags |= ATA_QCFLAG_FAILED; + } } } + spin_unlock(&hrrq->_lock); } - res->resetting_device = 1; scmd_printk(KERN_ERR, scsi_cmd, "Resetting device\n"); @@ -4833,11 +4962,17 @@ static int __ipr_eh_dev_reset(struct scsi_cmnd *scsi_cmd) ata_std_error_handler(ap); spin_lock_irq(scsi_cmd->device->host->host_lock); - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->ioarcb.res_handle == res->res_handle) { - rc = -EIO; - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, + &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->ioarcb.res_handle == + res->res_handle) { + rc = -EIO; + break; + } } + spin_unlock(&hrrq->_lock); } } else rc = ipr_device_reset(ioa_cfg, res); @@ -4890,7 +5025,7 @@ static void ipr_bus_reset_done(struct ipr_cmnd *ipr_cmd) else ipr_cmd->sibling->done(ipr_cmd->sibling); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); LEAVE; } @@ -4951,6 +5086,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) struct ipr_cmd_pkt *cmd_pkt; u32 ioasc, int_reg; int op_found = 0; + struct ipr_hrr_queue *hrrq; ENTER; ioa_cfg = (struct ipr_ioa_cfg *)scsi_cmd->device->host->hostdata; @@ -4960,7 +5096,8 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) * This will force the mid-layer to call ipr_eh_host_reset, * which will then go to sleep and wait for the reset to complete */ - if (ioa_cfg->in_reset_reload || ioa_cfg->ioa_is_dead) + if (ioa_cfg->in_reset_reload || + ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return FAILED; if (!res) return FAILED; @@ -4975,12 +5112,16 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) if (!ipr_is_gscsi(res)) return FAILED; - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->scsi_cmd == scsi_cmd) { - ipr_cmd->done = ipr_scsi_eh_done; - op_found = 1; - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->scsi_cmd == scsi_cmd) { + ipr_cmd->done = ipr_scsi_eh_done; + op_found = 1; + break; + } } + spin_unlock(&hrrq->_lock); } if (!op_found) @@ -5007,7 +5148,7 @@ static int ipr_cancel_op(struct scsi_cmnd *scsi_cmd) ipr_trace; } - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); if (!ipr_is_naca_model(res)) res->needs_sync_complete = 1; @@ -5099,6 +5240,9 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, } else { if (int_reg & IPR_PCII_IOA_UNIT_CHECKED) ioa_cfg->ioa_unit_checked = 1; + else if (int_reg & IPR_PCII_NO_HOST_RRQ) + dev_err(&ioa_cfg->pdev->dev, + "No Host RRQ. 0x%08X\n", int_reg); else dev_err(&ioa_cfg->pdev->dev, "Permanent IOA failure. 0x%08X\n", int_reg); @@ -5121,10 +5265,10 @@ static irqreturn_t ipr_handle_other_interrupt(struct ipr_ioa_cfg *ioa_cfg, * Return value: * none **/ -static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) +static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg, u16 number) { ioa_cfg->errors_logged++; - dev_err(&ioa_cfg->pdev->dev, "%s\n", msg); + dev_err(&ioa_cfg->pdev->dev, "%s %d\n", msg, number); if (WAIT_FOR_DUMP == ioa_cfg->sdt_state) ioa_cfg->sdt_state = GET_DUMP; @@ -5132,6 +5276,83 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); } +static int ipr_process_hrrq(struct ipr_hrr_queue *hrr_queue, int budget, + struct list_head *doneq) +{ + u32 ioasc; + u16 cmd_index; + struct ipr_cmnd *ipr_cmd; + struct ipr_ioa_cfg *ioa_cfg = hrr_queue->ioa_cfg; + int num_hrrq = 0; + + /* If interrupts are disabled, ignore the interrupt */ + if (!hrr_queue->allow_interrupts) + return 0; + + while ((be32_to_cpu(*hrr_queue->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrr_queue->toggle_bit) { + + cmd_index = (be32_to_cpu(*hrr_queue->hrrq_curr) & + IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> + IPR_HRRQ_REQ_RESP_HANDLE_SHIFT; + + if (unlikely(cmd_index > hrr_queue->max_cmd_id || + cmd_index < hrr_queue->min_cmd_id)) { + ipr_isr_eh(ioa_cfg, + "Invalid response handle from IOA: ", + cmd_index); + break; + } + + ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index]; + ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + + ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc); + + list_move_tail(&ipr_cmd->queue, doneq); + + if (hrr_queue->hrrq_curr < hrr_queue->hrrq_end) { + hrr_queue->hrrq_curr++; + } else { + hrr_queue->hrrq_curr = hrr_queue->hrrq_start; + hrr_queue->toggle_bit ^= 1u; + } + num_hrrq++; + if (budget > 0 && num_hrrq >= budget) + break; + } + + return num_hrrq; +} + +static int ipr_iopoll(struct blk_iopoll *iop, int budget) +{ + struct ipr_ioa_cfg *ioa_cfg; + struct ipr_hrr_queue *hrrq; + struct ipr_cmnd *ipr_cmd, *temp; + unsigned long hrrq_flags; + int completed_ops; + LIST_HEAD(doneq); + + hrrq = container_of(iop, struct ipr_hrr_queue, iopoll); + ioa_cfg = hrrq->ioa_cfg; + + spin_lock_irqsave(hrrq->lock, hrrq_flags); + completed_ops = ipr_process_hrrq(hrrq, budget, &doneq); + + if (completed_ops < budget) + blk_iopoll_complete(iop); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + + list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { + list_del(&ipr_cmd->queue); + del_timer(&ipr_cmd->timer); + ipr_cmd->fast_done(ipr_cmd); + } + + return completed_ops; +} + /** * ipr_isr - Interrupt service routine * @irq: irq number @@ -5142,78 +5363,48 @@ static void ipr_isr_eh(struct ipr_ioa_cfg *ioa_cfg, char *msg) **/ static irqreturn_t ipr_isr(int irq, void *devp) { - struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)devp; - unsigned long lock_flags = 0; + struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp; + struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg; + unsigned long hrrq_flags = 0; u32 int_reg = 0; - u32 ioasc; - u16 cmd_index; int num_hrrq = 0; int irq_none = 0; struct ipr_cmnd *ipr_cmd, *temp; irqreturn_t rc = IRQ_NONE; LIST_HEAD(doneq); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - + spin_lock_irqsave(hrrq->lock, hrrq_flags); /* If interrupts are disabled, ignore the interrupt */ - if (!ioa_cfg->allow_interrupts) { - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + if (!hrrq->allow_interrupts) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return IRQ_NONE; } while (1) { - ipr_cmd = NULL; - - while ((be32_to_cpu(*ioa_cfg->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == - ioa_cfg->toggle_bit) { - - cmd_index = (be32_to_cpu(*ioa_cfg->hrrq_curr) & - IPR_HRRQ_REQ_RESP_HANDLE_MASK) >> IPR_HRRQ_REQ_RESP_HANDLE_SHIFT; - - if (unlikely(cmd_index >= IPR_NUM_CMD_BLKS)) { - ipr_isr_eh(ioa_cfg, "Invalid response handle from IOA"); - rc = IRQ_HANDLED; - goto unlock_out; - } - - ipr_cmd = ioa_cfg->ipr_cmnd_list[cmd_index]; - - ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + if (ipr_process_hrrq(hrrq, -1, &doneq)) { + rc = IRQ_HANDLED; - ipr_trc_hook(ipr_cmd, IPR_TRACE_FINISH, ioasc); - - list_move_tail(&ipr_cmd->queue, &doneq); - - rc = IRQ_HANDLED; - - if (ioa_cfg->hrrq_curr < ioa_cfg->hrrq_end) { - ioa_cfg->hrrq_curr++; - } else { - ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start; - ioa_cfg->toggle_bit ^= 1u; - } - } - - if (ipr_cmd && !ioa_cfg->clear_isr) - break; + if (!ioa_cfg->clear_isr) + break; - if (ipr_cmd != NULL) { /* Clear the PCI interrupt */ num_hrrq = 0; do { - writel(IPR_PCII_HRRQ_UPDATED, ioa_cfg->regs.clr_interrupt_reg32); + writel(IPR_PCII_HRRQ_UPDATED, + ioa_cfg->regs.clr_interrupt_reg32); int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32); } while (int_reg & IPR_PCII_HRRQ_UPDATED && - num_hrrq++ < IPR_MAX_HRRQ_RETRIES); + num_hrrq++ < IPR_MAX_HRRQ_RETRIES); } else if (rc == IRQ_NONE && irq_none == 0) { int_reg = readl(ioa_cfg->regs.sense_interrupt_reg32); irq_none++; } else if (num_hrrq == IPR_MAX_HRRQ_RETRIES && int_reg & IPR_PCII_HRRQ_UPDATED) { - ipr_isr_eh(ioa_cfg, "Error clearing HRRQ"); + ipr_isr_eh(ioa_cfg, + "Error clearing HRRQ: ", num_hrrq); rc = IRQ_HANDLED; - goto unlock_out; + break; } else break; } @@ -5221,14 +5412,64 @@ static irqreturn_t ipr_isr(int irq, void *devp) if (unlikely(rc == IRQ_NONE)) rc = ipr_handle_other_interrupt(ioa_cfg, int_reg); -unlock_out: - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { list_del(&ipr_cmd->queue); del_timer(&ipr_cmd->timer); ipr_cmd->fast_done(ipr_cmd); } + return rc; +} + +/** + * ipr_isr_mhrrq - Interrupt service routine + * @irq: irq number + * @devp: pointer to ioa config struct + * + * Return value: + * IRQ_NONE / IRQ_HANDLED + **/ +static irqreturn_t ipr_isr_mhrrq(int irq, void *devp) +{ + struct ipr_hrr_queue *hrrq = (struct ipr_hrr_queue *)devp; + struct ipr_ioa_cfg *ioa_cfg = hrrq->ioa_cfg; + unsigned long hrrq_flags = 0; + struct ipr_cmnd *ipr_cmd, *temp; + irqreturn_t rc = IRQ_NONE; + LIST_HEAD(doneq); + + spin_lock_irqsave(hrrq->lock, hrrq_flags); + + /* If interrupts are disabled, ignore the interrupt */ + if (!hrrq->allow_interrupts) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return IRQ_NONE; + } + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrrq->toggle_bit) { + if (!blk_iopoll_sched_prep(&hrrq->iopoll)) + blk_iopoll_sched(&hrrq->iopoll); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return IRQ_HANDLED; + } + } else { + if ((be32_to_cpu(*hrrq->hrrq_curr) & IPR_HRRQ_TOGGLE_BIT) == + hrrq->toggle_bit) + + if (ipr_process_hrrq(hrrq, -1, &doneq)) + rc = IRQ_HANDLED; + } + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + + list_for_each_entry_safe(ipr_cmd, temp, &doneq, queue) { + list_del(&ipr_cmd->queue); + del_timer(&ipr_cmd->timer); + ipr_cmd->fast_done(ipr_cmd); + } return rc; } @@ -5388,7 +5629,6 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd) { struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; struct ipr_resource_entry *res = scsi_cmd->device->hostdata; - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); if (IPR_IOASC_SENSE_KEY(ioasc) > 0) { @@ -5406,7 +5646,7 @@ static void ipr_erp_done(struct ipr_cmnd *ipr_cmd) res->in_erp = 0; } scsi_dma_unmap(ipr_cmd->scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -5790,7 +6030,7 @@ static void ipr_erp_start(struct ipr_ioa_cfg *ioa_cfg, } scsi_dma_unmap(ipr_cmd->scsi_cmd); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); } @@ -5809,21 +6049,21 @@ static void ipr_scsi_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct scsi_cmnd *scsi_cmd = ipr_cmd->scsi_cmd; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); - unsigned long lock_flags; + unsigned long hrrq_flags; scsi_set_resid(scsi_cmd, be32_to_cpu(ipr_cmd->s.ioasa.hdr.residual_data_len)); if (likely(IPR_IOASC_SENSE_KEY(ioasc) == 0)) { scsi_dma_unmap(scsi_cmd); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); } else { - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + spin_lock_irqsave(ipr_cmd->hrrq->lock, hrrq_flags); ipr_erp_start(ioa_cfg, ipr_cmd); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + spin_unlock_irqrestore(ipr_cmd->hrrq->lock, hrrq_flags); } } @@ -5846,22 +6086,34 @@ static int ipr_queuecommand(struct Scsi_Host *shost, struct ipr_resource_entry *res; struct ipr_ioarcb *ioarcb; struct ipr_cmnd *ipr_cmd; - unsigned long lock_flags; + unsigned long hrrq_flags, lock_flags; int rc; + struct ipr_hrr_queue *hrrq; + int hrrq_id; ioa_cfg = (struct ipr_ioa_cfg *)shost->hostdata; - spin_lock_irqsave(shost->host_lock, lock_flags); scsi_cmd->result = (DID_OK << 16); res = scsi_cmd->device->hostdata; + if (ipr_is_gata(res) && res->sata_port) { + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + return rc; + } + + hrrq_id = ipr_get_hrrq_index(ioa_cfg); + hrrq = &ioa_cfg->hrrq[hrrq_id]; + + spin_lock_irqsave(hrrq->lock, hrrq_flags); /* * We are currently blocking all devices due to a host reset * We have told the host to stop giving us new requests, but * ERP ops don't count. FIXME */ - if (unlikely(!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead)) { - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(!hrrq->allow_cmds && !hrrq->ioa_is_dead)) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return SCSI_MLQUEUE_HOST_BUSY; } @@ -5869,19 +6121,17 @@ static int ipr_queuecommand(struct Scsi_Host *shost, * FIXME - Create scsi_set_host_offline interface * and the ioa_is_dead check can be removed */ - if (unlikely(ioa_cfg->ioa_is_dead || !res)) { - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(hrrq->ioa_is_dead || !res)) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); goto err_nodev; } - if (ipr_is_gata(res) && res->sata_port) { - rc = ata_sas_queuecmd(scsi_cmd, res->sata_port->ap); - spin_unlock_irqrestore(shost->host_lock, lock_flags); - return rc; + ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq); + if (ipr_cmd == NULL) { + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); + return SCSI_MLQUEUE_HOST_BUSY; } - - ipr_cmd = __ipr_get_free_ipr_cmnd(ioa_cfg); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); ipr_init_ipr_cmnd(ipr_cmd, ipr_scsi_done); ioarcb = &ipr_cmd->ioarcb; @@ -5902,26 +6152,27 @@ static int ipr_queuecommand(struct Scsi_Host *shost, } if (scsi_cmd->cmnd[0] >= 0xC0 && - (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) + (!ipr_is_gscsi(res) || scsi_cmd->cmnd[0] == IPR_QUERY_RSRC_STATE)) { ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; + } if (ioa_cfg->sis64) rc = ipr_build_ioadl64(ioa_cfg, ipr_cmd); else rc = ipr_build_ioadl(ioa_cfg, ipr_cmd); - spin_lock_irqsave(shost->host_lock, lock_flags); - if (unlikely(rc || (!ioa_cfg->allow_cmds && !ioa_cfg->ioa_is_dead))) { - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_lock_irqsave(hrrq->lock, hrrq_flags); + if (unlikely(rc || (!hrrq->allow_cmds && !hrrq->ioa_is_dead))) { + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); if (!rc) scsi_dma_unmap(scsi_cmd); return SCSI_MLQUEUE_HOST_BUSY; } - if (unlikely(ioa_cfg->ioa_is_dead)) { - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + if (unlikely(hrrq->ioa_is_dead)) { + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_free_q); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); scsi_dma_unmap(scsi_cmd); goto err_nodev; } @@ -5931,18 +6182,18 @@ static int ipr_queuecommand(struct Scsi_Host *shost, ioarcb->cmd_pkt.flags_hi |= IPR_FLAGS_HI_SYNC_COMPLETE; res->needs_sync_complete = 0; } - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &hrrq->hrrq_pending_q); ipr_trc_hook(ipr_cmd, IPR_TRACE_START, IPR_GET_RES_PHYS_LOC(res)); ipr_send_command(ipr_cmd); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return 0; err_nodev: - spin_lock_irqsave(shost->host_lock, lock_flags); + spin_lock_irqsave(hrrq->lock, hrrq_flags); memset(scsi_cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); scsi_cmd->result = (DID_NO_CONNECT << 16); scsi_cmd->scsi_done(scsi_cmd); - spin_unlock_irqrestore(shost->host_lock, lock_flags); + spin_unlock_irqrestore(hrrq->lock, hrrq_flags); return 0; } @@ -6040,7 +6291,7 @@ static void ipr_ata_phy_reset(struct ata_port *ap) spin_lock_irqsave(ioa_cfg->host->host_lock, flags); } - if (!ioa_cfg->allow_cmds) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) goto out_unlock; rc = ipr_device_reset(ioa_cfg, res); @@ -6071,6 +6322,7 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc) struct ipr_sata_port *sata_port = qc->ap->private_data; struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg; struct ipr_cmnd *ipr_cmd; + struct ipr_hrr_queue *hrrq; unsigned long flags; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); @@ -6080,11 +6332,15 @@ static void ipr_ata_post_internal(struct ata_queued_cmd *qc) spin_lock_irqsave(ioa_cfg->host->host_lock, flags); } - list_for_each_entry(ipr_cmd, &ioa_cfg->pending_q, queue) { - if (ipr_cmd->qc == qc) { - ipr_device_reset(ioa_cfg, sata_port->res); - break; + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + list_for_each_entry(ipr_cmd, &hrrq->hrrq_pending_q, queue) { + if (ipr_cmd->qc == qc) { + ipr_device_reset(ioa_cfg, sata_port->res); + break; + } } + spin_unlock(&hrrq->_lock); } spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); } @@ -6133,6 +6389,7 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd) struct ipr_resource_entry *res = sata_port->res; u32 ioasc = be32_to_cpu(ipr_cmd->s.ioasa.hdr.ioasc); + spin_lock(&ipr_cmd->hrrq->_lock); if (ipr_cmd->ioa_cfg->sis64) memcpy(&sata_port->ioasa, &ipr_cmd->s.ioasa64.u.gata, sizeof(struct ipr_ioasa_gata)); @@ -6148,7 +6405,8 @@ static void ipr_sata_done(struct ipr_cmnd *ipr_cmd) qc->err_mask |= __ac_err_mask(sata_port->ioasa.status); else qc->err_mask |= ac_err_mask(sata_port->ioasa.status); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + spin_unlock(&ipr_cmd->hrrq->_lock); ata_qc_complete(qc); } @@ -6244,6 +6502,48 @@ static void ipr_build_ata_ioadl(struct ipr_cmnd *ipr_cmd, } /** + * ipr_qc_defer - Get a free ipr_cmd + * @qc: queued command + * + * Return value: + * 0 if success + **/ +static int ipr_qc_defer(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct ipr_sata_port *sata_port = ap->private_data; + struct ipr_ioa_cfg *ioa_cfg = sata_port->ioa_cfg; + struct ipr_cmnd *ipr_cmd; + struct ipr_hrr_queue *hrrq; + int hrrq_id; + + hrrq_id = ipr_get_hrrq_index(ioa_cfg); + hrrq = &ioa_cfg->hrrq[hrrq_id]; + + qc->lldd_task = NULL; + spin_lock(&hrrq->_lock); + if (unlikely(hrrq->ioa_is_dead)) { + spin_unlock(&hrrq->_lock); + return 0; + } + + if (unlikely(!hrrq->allow_cmds)) { + spin_unlock(&hrrq->_lock); + return ATA_DEFER_LINK; + } + + ipr_cmd = __ipr_get_free_ipr_cmnd(hrrq); + if (ipr_cmd == NULL) { + spin_unlock(&hrrq->_lock); + return ATA_DEFER_LINK; + } + + qc->lldd_task = ipr_cmd; + spin_unlock(&hrrq->_lock); + return 0; +} + +/** * ipr_qc_issue - Issue a SATA qc to a device * @qc: queued command * @@ -6260,10 +6560,23 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) struct ipr_ioarcb *ioarcb; struct ipr_ioarcb_ata_regs *regs; - if (unlikely(!ioa_cfg->allow_cmds || ioa_cfg->ioa_is_dead)) + if (qc->lldd_task == NULL) + ipr_qc_defer(qc); + + ipr_cmd = qc->lldd_task; + if (ipr_cmd == NULL) return AC_ERR_SYSTEM; - ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); + qc->lldd_task = NULL; + spin_lock(&ipr_cmd->hrrq->_lock); + if (unlikely(!ipr_cmd->hrrq->allow_cmds || + ipr_cmd->hrrq->ioa_is_dead)) { + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + spin_unlock(&ipr_cmd->hrrq->_lock); + return AC_ERR_SYSTEM; + } + + ipr_init_ipr_cmnd(ipr_cmd, ipr_lock_and_done); ioarcb = &ipr_cmd->ioarcb; if (ioa_cfg->sis64) { @@ -6275,7 +6588,7 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) memset(regs, 0, sizeof(*regs)); ioarcb->add_cmd_parms_len = cpu_to_be16(sizeof(*regs)); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->qc = qc; ipr_cmd->done = ipr_sata_done; ipr_cmd->ioarcb.res_handle = res->res_handle; @@ -6315,10 +6628,12 @@ static unsigned int ipr_qc_issue(struct ata_queued_cmd *qc) default: WARN_ON(1); + spin_unlock(&ipr_cmd->hrrq->_lock); return AC_ERR_INVALID; } ipr_send_command(ipr_cmd); + spin_unlock(&ipr_cmd->hrrq->_lock); return 0; } @@ -6357,6 +6672,7 @@ static struct ata_port_operations ipr_sata_ops = { .hardreset = ipr_sata_reset, .post_internal_cmd = ipr_ata_post_internal, .qc_prep = ata_noop_qc_prep, + .qc_defer = ipr_qc_defer, .qc_issue = ipr_qc_issue, .qc_fill_rtf = ipr_qc_fill_rtf, .port_start = ata_sas_port_start, @@ -6427,7 +6743,7 @@ static int ipr_ioa_bringdown_done(struct ipr_cmnd *ipr_cmd) ENTER; ioa_cfg->in_reset_reload = 0; ioa_cfg->reset_retries = 0; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); spin_unlock_irq(ioa_cfg->host->host_lock); @@ -6454,11 +6770,16 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ipr_resource_entry *res; struct ipr_hostrcb *hostrcb, *temp; - int i = 0; + int i = 0, j; ENTER; ioa_cfg->in_reset_reload = 0; - ioa_cfg->allow_cmds = 1; + for (j = 0; j < ioa_cfg->hrrq_num; j++) { + spin_lock(&ioa_cfg->hrrq[j]._lock); + ioa_cfg->hrrq[j].allow_cmds = 1; + spin_unlock(&ioa_cfg->hrrq[j]._lock); + } + wmb(); ioa_cfg->reset_cmd = NULL; ioa_cfg->doorbell |= IPR_RUNTIME_RESET; @@ -6482,14 +6803,14 @@ static int ipr_ioa_reset_done(struct ipr_cmnd *ipr_cmd) dev_info(&ioa_cfg->pdev->dev, "IOA initialized.\n"); ioa_cfg->reset_retries = 0; - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); wake_up_all(&ioa_cfg->reset_wait_q); spin_unlock(ioa_cfg->host->host_lock); scsi_unblock_requests(ioa_cfg->host); spin_lock(ioa_cfg->host->host_lock); - if (!ioa_cfg->allow_cmds) + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) scsi_block_requests(ioa_cfg->host); LEAVE; @@ -6560,9 +6881,11 @@ static int ipr_set_supported_devs(struct ipr_cmnd *ipr_cmd) if (!ioa_cfg->sis64) ipr_cmd->job_step = ipr_set_supported_devs; + LEAVE; return IPR_RC_JOB_RETURN; } + LEAVE; return IPR_RC_JOB_CONTINUE; } @@ -6820,7 +7143,7 @@ static int ipr_reset_cmd_failed(struct ipr_cmnd *ipr_cmd) ipr_cmd->ioarcb.cmd_pkt.cdb[0], ioasc); ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); return IPR_RC_JOB_RETURN; } @@ -7278,46 +7601,71 @@ static int ipr_ioafp_identify_hrrq(struct ipr_cmnd *ipr_cmd) { struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; struct ipr_ioarcb *ioarcb = &ipr_cmd->ioarcb; + struct ipr_hrr_queue *hrrq; ENTER; + ipr_cmd->job_step = ipr_ioafp_std_inquiry; dev_info(&ioa_cfg->pdev->dev, "Starting IOA initialization sequence.\n"); - ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q; - ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); + if (ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) { + hrrq = &ioa_cfg->hrrq[ioa_cfg->identify_hrrq_index]; - ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; - if (ioa_cfg->sis64) - ioarcb->cmd_pkt.cdb[1] = 0x1; - ioarcb->cmd_pkt.cdb[2] = - ((u64) ioa_cfg->host_rrq_dma >> 24) & 0xff; - ioarcb->cmd_pkt.cdb[3] = - ((u64) ioa_cfg->host_rrq_dma >> 16) & 0xff; - ioarcb->cmd_pkt.cdb[4] = - ((u64) ioa_cfg->host_rrq_dma >> 8) & 0xff; - ioarcb->cmd_pkt.cdb[5] = - ((u64) ioa_cfg->host_rrq_dma) & 0xff; - ioarcb->cmd_pkt.cdb[7] = - ((sizeof(u32) * IPR_NUM_CMD_BLKS) >> 8) & 0xff; - ioarcb->cmd_pkt.cdb[8] = - (sizeof(u32) * IPR_NUM_CMD_BLKS) & 0xff; + ioarcb->cmd_pkt.cdb[0] = IPR_ID_HOST_RR_Q; + ioarcb->res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); - if (ioa_cfg->sis64) { - ioarcb->cmd_pkt.cdb[10] = - ((u64) ioa_cfg->host_rrq_dma >> 56) & 0xff; - ioarcb->cmd_pkt.cdb[11] = - ((u64) ioa_cfg->host_rrq_dma >> 48) & 0xff; - ioarcb->cmd_pkt.cdb[12] = - ((u64) ioa_cfg->host_rrq_dma >> 40) & 0xff; - ioarcb->cmd_pkt.cdb[13] = - ((u64) ioa_cfg->host_rrq_dma >> 32) & 0xff; - } + ioarcb->cmd_pkt.request_type = IPR_RQTYPE_IOACMD; + if (ioa_cfg->sis64) + ioarcb->cmd_pkt.cdb[1] = 0x1; - ipr_cmd->job_step = ipr_ioafp_std_inquiry; + if (ioa_cfg->nvectors == 1) + ioarcb->cmd_pkt.cdb[1] &= ~IPR_ID_HRRQ_SELE_ENABLE; + else + ioarcb->cmd_pkt.cdb[1] |= IPR_ID_HRRQ_SELE_ENABLE; + + ioarcb->cmd_pkt.cdb[2] = + ((u64) hrrq->host_rrq_dma >> 24) & 0xff; + ioarcb->cmd_pkt.cdb[3] = + ((u64) hrrq->host_rrq_dma >> 16) & 0xff; + ioarcb->cmd_pkt.cdb[4] = + ((u64) hrrq->host_rrq_dma >> 8) & 0xff; + ioarcb->cmd_pkt.cdb[5] = + ((u64) hrrq->host_rrq_dma) & 0xff; + ioarcb->cmd_pkt.cdb[7] = + ((sizeof(u32) * hrrq->size) >> 8) & 0xff; + ioarcb->cmd_pkt.cdb[8] = + (sizeof(u32) * hrrq->size) & 0xff; + + if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE) + ioarcb->cmd_pkt.cdb[9] = + ioa_cfg->identify_hrrq_index; - ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, IPR_INTERNAL_TIMEOUT); + if (ioa_cfg->sis64) { + ioarcb->cmd_pkt.cdb[10] = + ((u64) hrrq->host_rrq_dma >> 56) & 0xff; + ioarcb->cmd_pkt.cdb[11] = + ((u64) hrrq->host_rrq_dma >> 48) & 0xff; + ioarcb->cmd_pkt.cdb[12] = + ((u64) hrrq->host_rrq_dma >> 40) & 0xff; + ioarcb->cmd_pkt.cdb[13] = + ((u64) hrrq->host_rrq_dma >> 32) & 0xff; + } + + if (ioarcb->cmd_pkt.cdb[1] & IPR_ID_HRRQ_SELE_ENABLE) + ioarcb->cmd_pkt.cdb[14] = + ioa_cfg->identify_hrrq_index; + + ipr_do_req(ipr_cmd, ipr_reset_ioa_job, ipr_timeout, + IPR_INTERNAL_TIMEOUT); + + if (++ioa_cfg->identify_hrrq_index < ioa_cfg->hrrq_num) + ipr_cmd->job_step = ipr_ioafp_identify_hrrq; + + LEAVE; + return IPR_RC_JOB_RETURN; + } LEAVE; - return IPR_RC_JOB_RETURN; + return IPR_RC_JOB_CONTINUE; } /** @@ -7365,7 +7713,9 @@ static void ipr_reset_timer_done(struct ipr_cmnd *ipr_cmd) static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd, unsigned long timeout) { - list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q); + + ENTER; + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = ipr_reset_ioa_job; ipr_cmd->timer.data = (unsigned long) ipr_cmd; @@ -7383,13 +7733,26 @@ static void ipr_reset_start_timer(struct ipr_cmnd *ipr_cmd, **/ static void ipr_init_ioa_mem(struct ipr_ioa_cfg *ioa_cfg) { - memset(ioa_cfg->host_rrq, 0, sizeof(u32) * IPR_NUM_CMD_BLKS); + struct ipr_hrr_queue *hrrq; + + for_each_hrrq(hrrq, ioa_cfg) { + spin_lock(&hrrq->_lock); + memset(hrrq->host_rrq, 0, sizeof(u32) * hrrq->size); + + /* Initialize Host RRQ pointers */ + hrrq->hrrq_start = hrrq->host_rrq; + hrrq->hrrq_end = &hrrq->host_rrq[hrrq->size - 1]; + hrrq->hrrq_curr = hrrq->hrrq_start; + hrrq->toggle_bit = 1; + spin_unlock(&hrrq->_lock); + } + wmb(); - /* Initialize Host RRQ pointers */ - ioa_cfg->hrrq_start = ioa_cfg->host_rrq; - ioa_cfg->hrrq_end = &ioa_cfg->host_rrq[IPR_NUM_CMD_BLKS - 1]; - ioa_cfg->hrrq_curr = ioa_cfg->hrrq_start; - ioa_cfg->toggle_bit = 1; + ioa_cfg->identify_hrrq_index = 0; + if (ioa_cfg->hrrq_num == 1) + atomic_set(&ioa_cfg->hrrq_index, 0); + else + atomic_set(&ioa_cfg->hrrq_index, 1); /* Zero out config table */ memset(ioa_cfg->u.cfg_table, 0, ioa_cfg->cfg_table_size); @@ -7446,7 +7809,8 @@ static int ipr_reset_next_stage(struct ipr_cmnd *ipr_cmd) ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout; ipr_cmd->done = ipr_reset_ioa_job; add_timer(&ipr_cmd->timer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); return IPR_RC_JOB_RETURN; } @@ -7466,12 +7830,18 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd) struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; volatile u32 int_reg; volatile u64 maskval; + int i; ENTER; ipr_cmd->job_step = ipr_ioafp_identify_hrrq; ipr_init_ioa_mem(ioa_cfg); - ioa_cfg->allow_interrupts = 1; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); if (ioa_cfg->sis64) { /* Set the adapter to the correct endian mode. */ writel(IPR_ENDIAN_SWAP_KEY, ioa_cfg->regs.endian_swap_reg); @@ -7511,7 +7881,7 @@ static int ipr_reset_enable_ioa(struct ipr_cmnd *ipr_cmd) ipr_cmd->timer.function = (void (*)(unsigned long))ipr_oper_timeout; ipr_cmd->done = ipr_reset_ioa_job; add_timer(&ipr_cmd->timer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->pending_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); LEAVE; return IPR_RC_JOB_RETURN; @@ -8030,7 +8400,8 @@ static int ipr_reset_shutdown_ioa(struct ipr_cmnd *ipr_cmd) int rc = IPR_RC_JOB_CONTINUE; ENTER; - if (shutdown_type != IPR_SHUTDOWN_NONE && !ioa_cfg->ioa_is_dead) { + if (shutdown_type != IPR_SHUTDOWN_NONE && + !ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { ipr_cmd->ioarcb.res_handle = cpu_to_be32(IPR_IOA_RES_HANDLE); ipr_cmd->ioarcb.cmd_pkt.request_type = IPR_RQTYPE_IOACMD; ipr_cmd->ioarcb.cmd_pkt.cdb[0] = IPR_IOA_SHUTDOWN; @@ -8078,7 +8449,8 @@ static void ipr_reset_ioa_job(struct ipr_cmnd *ipr_cmd) * We are doing nested adapter resets and this is * not the current reset job. */ - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, + &ipr_cmd->hrrq->hrrq_free_q); return; } @@ -8113,9 +8485,15 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, enum ipr_shutdown_type shutdown_type) { struct ipr_cmnd *ipr_cmd; + int i; ioa_cfg->in_reset_reload = 1; - ioa_cfg->allow_cmds = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_cmds = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); scsi_block_requests(ioa_cfg->host); ipr_cmd = ipr_get_free_ipr_cmnd(ioa_cfg); @@ -8141,7 +8519,9 @@ static void _ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, enum ipr_shutdown_type shutdown_type) { - if (ioa_cfg->ioa_is_dead) + int i; + + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) return; if (ioa_cfg->in_reset_reload) { @@ -8156,7 +8536,12 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, "IOA taken offline - error recovery failed\n"); ioa_cfg->reset_retries = 0; - ioa_cfg->ioa_is_dead = 1; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].ioa_is_dead = 1; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); if (ioa_cfg->in_ioa_bringdown) { ioa_cfg->reset_cmd = NULL; @@ -8188,9 +8573,17 @@ static void ipr_initiate_ioa_reset(struct ipr_ioa_cfg *ioa_cfg, */ static int ipr_reset_freeze(struct ipr_cmnd *ipr_cmd) { + struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; + int i; + /* Disallow new interrupts, avoid loop */ - ipr_cmd->ioa_cfg->allow_interrupts = 0; - list_add_tail(&ipr_cmd->queue, &ipr_cmd->ioa_cfg->pending_q); + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_interrupts = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_pending_q); ipr_cmd->done = ipr_reset_ioa_job; return IPR_RC_JOB_RETURN; } @@ -8247,13 +8640,19 @@ static void ipr_pci_perm_failure(struct pci_dev *pdev) { unsigned long flags = 0; struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); + int i; spin_lock_irqsave(ioa_cfg->host->host_lock, flags); if (ioa_cfg->sdt_state == WAIT_FOR_DUMP) ioa_cfg->sdt_state = ABORT_DUMP; ioa_cfg->reset_retries = IPR_NUM_RESET_RELOAD_RETRIES; ioa_cfg->in_ioa_bringdown = 1; - ioa_cfg->allow_cmds = 0; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + spin_lock(&ioa_cfg->hrrq[i]._lock); + ioa_cfg->hrrq[i].allow_cmds = 0; + spin_unlock(&ioa_cfg->hrrq[i]._lock); + } + wmb(); ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE); spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); } @@ -8310,12 +8709,11 @@ static int ipr_probe_ioa_part2(struct ipr_ioa_cfg *ioa_cfg) } else _ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_enable_ioa, IPR_SHUTDOWN_NONE); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags); - if (ioa_cfg->ioa_is_dead) { + if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) { rc = -EIO; } else if (ipr_invalid_adapter(ioa_cfg)) { if (!ipr_testmode) @@ -8376,8 +8774,13 @@ static void ipr_free_mem(struct ipr_ioa_cfg *ioa_cfg) pci_free_consistent(ioa_cfg->pdev, sizeof(struct ipr_misc_cbs), ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma); ipr_free_cmd_blks(ioa_cfg); - pci_free_consistent(ioa_cfg->pdev, sizeof(u32) * IPR_NUM_CMD_BLKS, - ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma); + + for (i = 0; i < ioa_cfg->hrrq_num; i++) + pci_free_consistent(ioa_cfg->pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + pci_free_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size, ioa_cfg->u.cfg_table, ioa_cfg->cfg_table_dma); @@ -8408,8 +8811,23 @@ static void ipr_free_all_resources(struct ipr_ioa_cfg *ioa_cfg) struct pci_dev *pdev = ioa_cfg->pdev; ENTER; - free_irq(pdev->irq, ioa_cfg); - pci_disable_msi(pdev); + if (ioa_cfg->intr_flag == IPR_USE_MSI || + ioa_cfg->intr_flag == IPR_USE_MSIX) { + int i; + for (i = 0; i < ioa_cfg->nvectors; i++) + free_irq(ioa_cfg->vectors_info[i].vec, + &ioa_cfg->hrrq[i]); + } else + free_irq(pdev->irq, &ioa_cfg->hrrq[0]); + + if (ioa_cfg->intr_flag == IPR_USE_MSI) { + pci_disable_msi(pdev); + ioa_cfg->intr_flag &= ~IPR_USE_MSI; + } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) { + pci_disable_msix(pdev); + ioa_cfg->intr_flag &= ~IPR_USE_MSIX; + } + iounmap(ioa_cfg->hdw_dma_regs); pci_release_regions(pdev); ipr_free_mem(ioa_cfg); @@ -8430,7 +8848,7 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) struct ipr_cmnd *ipr_cmd; struct ipr_ioarcb *ioarcb; dma_addr_t dma_addr; - int i; + int i, entries_each_hrrq, hrrq_id = 0; ioa_cfg->ipr_cmd_pool = pci_pool_create(IPR_NAME, ioa_cfg->pdev, sizeof(struct ipr_cmnd), 512, 0); @@ -8446,6 +8864,41 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) return -ENOMEM; } + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + if (ioa_cfg->hrrq_num > 1) { + if (i == 0) { + entries_each_hrrq = IPR_NUM_INTERNAL_CMD_BLKS; + ioa_cfg->hrrq[i].min_cmd_id = 0; + ioa_cfg->hrrq[i].max_cmd_id = + (entries_each_hrrq - 1); + } else { + entries_each_hrrq = + IPR_NUM_BASE_CMD_BLKS/ + (ioa_cfg->hrrq_num - 1); + ioa_cfg->hrrq[i].min_cmd_id = + IPR_NUM_INTERNAL_CMD_BLKS + + (i - 1) * entries_each_hrrq; + ioa_cfg->hrrq[i].max_cmd_id = + (IPR_NUM_INTERNAL_CMD_BLKS + + i * entries_each_hrrq - 1); + } + } else { + entries_each_hrrq = IPR_NUM_CMD_BLKS; + ioa_cfg->hrrq[i].min_cmd_id = 0; + ioa_cfg->hrrq[i].max_cmd_id = (entries_each_hrrq - 1); + } + ioa_cfg->hrrq[i].size = entries_each_hrrq; + } + + BUG_ON(ioa_cfg->hrrq_num == 0); + + i = IPR_NUM_CMD_BLKS - + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id - 1; + if (i > 0) { + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].size += i; + ioa_cfg->hrrq[ioa_cfg->hrrq_num - 1].max_cmd_id += i; + } + for (i = 0; i < IPR_NUM_CMD_BLKS; i++) { ipr_cmd = pci_pool_alloc(ioa_cfg->ipr_cmd_pool, GFP_KERNEL, &dma_addr); @@ -8484,7 +8937,11 @@ static int ipr_alloc_cmd_blks(struct ipr_ioa_cfg *ioa_cfg) ipr_cmd->sense_buffer_dma = dma_addr + offsetof(struct ipr_cmnd, sense_buffer); - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + ipr_cmd->ioarcb.cmd_pkt.hrrq_id = hrrq_id; + ipr_cmd->hrrq = &ioa_cfg->hrrq[hrrq_id]; + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); + if (i >= ioa_cfg->hrrq[hrrq_id].max_cmd_id) + hrrq_id++; } return 0; @@ -8516,6 +8973,10 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL); ioa_cfg->vset_ids = kzalloc(sizeof(unsigned long) * BITS_TO_LONGS(ioa_cfg->max_devs_supported), GFP_KERNEL); + + if (!ioa_cfg->target_ids || !ioa_cfg->array_ids + || !ioa_cfg->vset_ids) + goto out_free_res_entries; } for (i = 0; i < ioa_cfg->max_devs_supported; i++) { @@ -8530,15 +8991,34 @@ static int ipr_alloc_mem(struct ipr_ioa_cfg *ioa_cfg) if (!ioa_cfg->vpd_cbs) goto out_free_res_entries; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_free_q); + INIT_LIST_HEAD(&ioa_cfg->hrrq[i].hrrq_pending_q); + spin_lock_init(&ioa_cfg->hrrq[i]._lock); + if (i == 0) + ioa_cfg->hrrq[i].lock = ioa_cfg->host->host_lock; + else + ioa_cfg->hrrq[i].lock = &ioa_cfg->hrrq[i]._lock; + } + if (ipr_alloc_cmd_blks(ioa_cfg)) goto out_free_vpd_cbs; - ioa_cfg->host_rrq = pci_alloc_consistent(ioa_cfg->pdev, - sizeof(u32) * IPR_NUM_CMD_BLKS, - &ioa_cfg->host_rrq_dma); - - if (!ioa_cfg->host_rrq) - goto out_ipr_free_cmd_blocks; + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + ioa_cfg->hrrq[i].host_rrq = pci_alloc_consistent(ioa_cfg->pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + &ioa_cfg->hrrq[i].host_rrq_dma); + + if (!ioa_cfg->hrrq[i].host_rrq) { + while (--i > 0) + pci_free_consistent(pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + goto out_ipr_free_cmd_blocks; + } + ioa_cfg->hrrq[i].ioa_cfg = ioa_cfg; + } ioa_cfg->u.cfg_table = pci_alloc_consistent(ioa_cfg->pdev, ioa_cfg->cfg_table_size, @@ -8582,8 +9062,12 @@ out_free_hostrcb_dma: ioa_cfg->u.cfg_table, ioa_cfg->cfg_table_dma); out_free_host_rrq: - pci_free_consistent(pdev, sizeof(u32) * IPR_NUM_CMD_BLKS, - ioa_cfg->host_rrq, ioa_cfg->host_rrq_dma); + for (i = 0; i < ioa_cfg->hrrq_num; i++) { + pci_free_consistent(pdev, + sizeof(u32) * ioa_cfg->hrrq[i].size, + ioa_cfg->hrrq[i].host_rrq, + ioa_cfg->hrrq[i].host_rrq_dma); + } out_ipr_free_cmd_blocks: ipr_free_cmd_blks(ioa_cfg); out_free_vpd_cbs: @@ -8591,6 +9075,9 @@ out_free_vpd_cbs: ioa_cfg->vpd_cbs, ioa_cfg->vpd_cbs_dma); out_free_res_entries: kfree(ioa_cfg->res_entries); + kfree(ioa_cfg->target_ids); + kfree(ioa_cfg->array_ids); + kfree(ioa_cfg->vset_ids); goto out; } @@ -8638,15 +9125,11 @@ static void ipr_init_ioa_cfg(struct ipr_ioa_cfg *ioa_cfg, ioa_cfg->doorbell = IPR_DOORBELL; sprintf(ioa_cfg->eye_catcher, IPR_EYECATCHER); sprintf(ioa_cfg->trace_start, IPR_TRACE_START_LABEL); - sprintf(ioa_cfg->ipr_free_label, IPR_FREEQ_LABEL); - sprintf(ioa_cfg->ipr_pending_label, IPR_PENDQ_LABEL); sprintf(ioa_cfg->cfg_table_start, IPR_CFG_TBL_START); sprintf(ioa_cfg->resource_table_label, IPR_RES_TABLE_LABEL); sprintf(ioa_cfg->ipr_hcam_label, IPR_HCAM_LABEL); sprintf(ioa_cfg->ipr_cmd_label, IPR_CMD_LABEL); - INIT_LIST_HEAD(&ioa_cfg->free_q); - INIT_LIST_HEAD(&ioa_cfg->pending_q); INIT_LIST_HEAD(&ioa_cfg->hostrcb_free_q); INIT_LIST_HEAD(&ioa_cfg->hostrcb_pending_q); INIT_LIST_HEAD(&ioa_cfg->free_res_q); @@ -8724,6 +9207,88 @@ ipr_get_chip_info(const struct pci_device_id *dev_id) return NULL; } +static int ipr_enable_msix(struct ipr_ioa_cfg *ioa_cfg) +{ + struct msix_entry entries[IPR_MAX_MSIX_VECTORS]; + int i, err, vectors; + + for (i = 0; i < ARRAY_SIZE(entries); ++i) + entries[i].entry = i; + + vectors = ipr_number_of_msix; + + while ((err = pci_enable_msix(ioa_cfg->pdev, entries, vectors)) > 0) + vectors = err; + + if (err < 0) { + pci_disable_msix(ioa_cfg->pdev); + return err; + } + + if (!err) { + for (i = 0; i < vectors; i++) + ioa_cfg->vectors_info[i].vec = entries[i].vector; + ioa_cfg->nvectors = vectors; + } + + return err; +} + +static int ipr_enable_msi(struct ipr_ioa_cfg *ioa_cfg) +{ + int i, err, vectors; + + vectors = ipr_number_of_msix; + + while ((err = pci_enable_msi_block(ioa_cfg->pdev, vectors)) > 0) + vectors = err; + + if (err < 0) { + pci_disable_msi(ioa_cfg->pdev); + return err; + } + + if (!err) { + for (i = 0; i < vectors; i++) + ioa_cfg->vectors_info[i].vec = ioa_cfg->pdev->irq + i; + ioa_cfg->nvectors = vectors; + } + + return err; +} + +static void name_msi_vectors(struct ipr_ioa_cfg *ioa_cfg) +{ + int vec_idx, n = sizeof(ioa_cfg->vectors_info[0].desc) - 1; + + for (vec_idx = 0; vec_idx < ioa_cfg->nvectors; vec_idx++) { + snprintf(ioa_cfg->vectors_info[vec_idx].desc, n, + "host%d-%d", ioa_cfg->host->host_no, vec_idx); + ioa_cfg->vectors_info[vec_idx]. + desc[strlen(ioa_cfg->vectors_info[vec_idx].desc)] = 0; + } +} + +static int ipr_request_other_msi_irqs(struct ipr_ioa_cfg *ioa_cfg) +{ + int i, rc; + + for (i = 1; i < ioa_cfg->nvectors; i++) { + rc = request_irq(ioa_cfg->vectors_info[i].vec, + ipr_isr_mhrrq, + 0, + ioa_cfg->vectors_info[i].desc, + &ioa_cfg->hrrq[i]); + if (rc) { + while (--i >= 0) + free_irq(ioa_cfg->vectors_info[i].vec, + &ioa_cfg->hrrq[i]); + return rc; + } + } + return 0; +} + /** * ipr_test_intr - Handle the interrupt generated in ipr_test_msi(). * @pdev: PCI device struct @@ -8740,6 +9305,7 @@ static irqreturn_t ipr_test_intr(int irq, void *devp) unsigned long lock_flags = 0; irqreturn_t rc = IRQ_HANDLED; + dev_info(&ioa_cfg->pdev->dev, "Received IRQ : %d\n", irq); spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ioa_cfg->msi_received = 1; @@ -8787,9 +9353,9 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) writel(IPR_PCII_IO_DEBUG_ACKNOWLEDGE, ioa_cfg->regs.sense_interrupt_reg32); int_reg = readl(ioa_cfg->regs.sense_interrupt_reg); wait_event_timeout(ioa_cfg->msi_wait_q, ioa_cfg->msi_received, HZ); + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER); - spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); if (!ioa_cfg->msi_received) { /* MSI test failed */ dev_info(&pdev->dev, "MSI test failed. Falling back to LSI.\n"); @@ -8806,8 +9372,7 @@ static int ipr_test_msi(struct ipr_ioa_cfg *ioa_cfg, struct pci_dev *pdev) return rc; } -/** - * ipr_probe_ioa - Allocates memory and does first stage of initialization + /* ipr_probe_ioa - Allocates memory and does first stage of initialization * @pdev: PCI device struct * @dev_id: PCI device id struct * @@ -8823,6 +9388,7 @@ static int ipr_probe_ioa(struct pci_dev *pdev, void __iomem *ipr_regs; int rc = PCIBIOS_SUCCESSFUL; volatile u32 mask, uproc, interrupts; + unsigned long lock_flags; ENTER; @@ -8918,17 +9484,56 @@ static int ipr_probe_ioa(struct pci_dev *pdev, goto cleanup_nomem; } - /* Enable MSI style interrupts if they are supported. */ - if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && !pci_enable_msi(pdev)) { + if (ipr_number_of_msix > IPR_MAX_MSIX_VECTORS) { + dev_err(&pdev->dev, "The max number of MSIX is %d\n", + IPR_MAX_MSIX_VECTORS); + ipr_number_of_msix = IPR_MAX_MSIX_VECTORS; + } + + if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && + ipr_enable_msix(ioa_cfg) == 0) + ioa_cfg->intr_flag = IPR_USE_MSIX; + else if (ioa_cfg->ipr_chip->intr_type == IPR_USE_MSI && + ipr_enable_msi(ioa_cfg) == 0) + ioa_cfg->intr_flag = IPR_USE_MSI; + else { + ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->nvectors = 1; + dev_info(&pdev->dev, "Cannot enable MSI.\n"); + } + + if (ioa_cfg->intr_flag == IPR_USE_MSI || + ioa_cfg->intr_flag == IPR_USE_MSIX) { rc = ipr_test_msi(ioa_cfg, pdev); - if (rc == -EOPNOTSUPP) - pci_disable_msi(pdev); + if (rc == -EOPNOTSUPP) { + if (ioa_cfg->intr_flag == IPR_USE_MSI) { + ioa_cfg->intr_flag &= ~IPR_USE_MSI; + pci_disable_msi(pdev); + } else if (ioa_cfg->intr_flag == IPR_USE_MSIX) { + ioa_cfg->intr_flag &= ~IPR_USE_MSIX; + pci_disable_msix(pdev); + } + + ioa_cfg->intr_flag = IPR_USE_LSI; + ioa_cfg->nvectors = 1; + } else if (rc) goto out_msi_disable; - else - dev_info(&pdev->dev, "MSI enabled with IRQ: %d\n", pdev->irq); - } else if (ipr_debug) - dev_info(&pdev->dev, "Cannot enable MSI.\n"); + else { + if (ioa_cfg->intr_flag == IPR_USE_MSI) + dev_info(&pdev->dev, + "Request for %d MSIs succeeded with starting IRQ: %d\n", + ioa_cfg->nvectors, pdev->irq); + else if (ioa_cfg->intr_flag == IPR_USE_MSIX) + dev_info(&pdev->dev, + "Request for %d MSIXs succeeded.", + ioa_cfg->nvectors); + } + } + + ioa_cfg->hrrq_num = min3(ioa_cfg->nvectors, + (unsigned int)num_online_cpus(), + (unsigned int)IPR_MAX_HRRQ_NUM); /* Save away PCI config space for use following IOA reset */ rc = pci_save_state(pdev); @@ -8975,11 +9580,24 @@ static int ipr_probe_ioa(struct pci_dev *pdev, if (interrupts & IPR_PCII_IOA_UNIT_CHECKED) ioa_cfg->ioa_unit_checked = 1; + spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); ipr_mask_and_clear_interrupts(ioa_cfg, ~IPR_PCII_IOA_TRANS_TO_OPER); - rc = request_irq(pdev->irq, ipr_isr, - ioa_cfg->msi_received ? 0 : IRQF_SHARED, - IPR_NAME, ioa_cfg); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); + if (ioa_cfg->intr_flag == IPR_USE_MSI + || ioa_cfg->intr_flag == IPR_USE_MSIX) { + name_msi_vectors(ioa_cfg); + rc = request_irq(ioa_cfg->vectors_info[0].vec, ipr_isr, + 0, + ioa_cfg->vectors_info[0].desc, + &ioa_cfg->hrrq[0]); + if (!rc) + rc = ipr_request_other_msi_irqs(ioa_cfg); + } else { + rc = request_irq(pdev->irq, ipr_isr, + IRQF_SHARED, + IPR_NAME, &ioa_cfg->hrrq[0]); + } if (rc) { dev_err(&pdev->dev, "Couldn't register IRQ %d! rc=%d\n", pdev->irq, rc); @@ -9004,7 +9622,10 @@ out: cleanup_nolog: ipr_free_mem(ioa_cfg); out_msi_disable: - pci_disable_msi(pdev); + if (ioa_cfg->intr_flag == IPR_USE_MSI) + pci_disable_msi(pdev); + else if (ioa_cfg->intr_flag == IPR_USE_MSIX) + pci_disable_msix(pdev); cleanup_nomem: iounmap(ipr_regs); out_release_regions: @@ -9138,7 +9759,7 @@ static void ipr_remove(struct pci_dev *pdev) static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) { struct ipr_ioa_cfg *ioa_cfg; - int rc; + int rc, i; rc = ipr_probe_ioa(pdev, dev_id); @@ -9185,6 +9806,17 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) scsi_add_device(ioa_cfg->host, IPR_IOA_BUS, IPR_IOA_TARGET, IPR_IOA_LUN); ioa_cfg->allow_ml_add_del = 1; ioa_cfg->host->max_channel = IPR_VSET_BUS; + ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight; + + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + for (i = 1; i < ioa_cfg->hrrq_num; i++) { + blk_iopoll_init(&ioa_cfg->hrrq[i].iopoll, + ioa_cfg->iopoll_weight, ipr_iopoll); + blk_iopoll_enable(&ioa_cfg->hrrq[i].iopoll); + } + } + schedule_work(&ioa_cfg->work_q); return 0; } @@ -9203,8 +9835,16 @@ static void ipr_shutdown(struct pci_dev *pdev) { struct ipr_ioa_cfg *ioa_cfg = pci_get_drvdata(pdev); unsigned long lock_flags = 0; + int i; spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags); + if (blk_iopoll_enabled && ioa_cfg->iopoll_weight && + ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { + ioa_cfg->iopoll_weight = 0; + for (i = 1; i < ioa_cfg->hrrq_num; i++) + blk_iopoll_disable(&ioa_cfg->hrrq[i].iopoll); + } + while (ioa_cfg->in_reset_reload) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags); wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload); @@ -9277,6 +9917,8 @@ static struct pci_device_id ipr_pci_table[] = { { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57B2, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C0, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C3, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROC_FPGA_E2, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C4, 0, 0, 0 }, @@ -9290,6 +9932,14 @@ static struct pci_device_id ipr_pci_table[] = { PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57C8, 0, 0, 0 }, { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57CE, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D5, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D6, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D7, 0, 0, 0 }, + { PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_CROCODILE, + PCI_VENDOR_ID_IBM, IPR_SUBS_DEV_ID_57D8, 0, 0, 0 }, { } }; MODULE_DEVICE_TABLE(pci, ipr_pci_table); @@ -9316,9 +9966,7 @@ static struct pci_driver ipr_driver = { **/ static void ipr_halt_done(struct ipr_cmnd *ipr_cmd) { - struct ipr_ioa_cfg *ioa_cfg = ipr_cmd->ioa_cfg; - - list_add_tail(&ipr_cmd->queue, &ioa_cfg->free_q); + list_add_tail(&ipr_cmd->queue, &ipr_cmd->hrrq->hrrq_free_q); } /** @@ -9340,7 +9988,7 @@ static int ipr_halt(struct notifier_block *nb, ulong event, void *buf) list_for_each_entry(ioa_cfg, &ipr_ioa_head, queue) { spin_lock_irqsave(ioa_cfg->host->host_lock, flags); - if (!ioa_cfg->allow_cmds) { + if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) { spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); continue; } diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h index c8a137f83bb..1a9a246932a 100644 --- a/drivers/scsi/ipr.h +++ b/drivers/scsi/ipr.h @@ -32,14 +32,15 @@ #include <linux/libata.h> #include <linux/list.h> #include <linux/kref.h> +#include <linux/blk-iopoll.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> /* * Literals */ -#define IPR_DRIVER_VERSION "2.5.4" -#define IPR_DRIVER_DATE "(July 11, 2012)" +#define IPR_DRIVER_VERSION "2.6.0" +#define IPR_DRIVER_DATE "(November 16, 2012)" /* * IPR_MAX_CMD_PER_LUN: This defines the maximum number of outstanding @@ -82,6 +83,7 @@ #define IPR_SUBS_DEV_ID_57B4 0x033B #define IPR_SUBS_DEV_ID_57B2 0x035F +#define IPR_SUBS_DEV_ID_57C0 0x0352 #define IPR_SUBS_DEV_ID_57C3 0x0353 #define IPR_SUBS_DEV_ID_57C4 0x0354 #define IPR_SUBS_DEV_ID_57C6 0x0357 @@ -94,6 +96,10 @@ #define IPR_SUBS_DEV_ID_574D 0x0356 #define IPR_SUBS_DEV_ID_57C8 0x035D +#define IPR_SUBS_DEV_ID_57D5 0x03FB +#define IPR_SUBS_DEV_ID_57D6 0x03FC +#define IPR_SUBS_DEV_ID_57D7 0x03FF +#define IPR_SUBS_DEV_ID_57D8 0x03FE #define IPR_NAME "ipr" /* @@ -298,6 +304,9 @@ IPR_PCII_NO_HOST_RRQ | IPR_PCII_IOARRIN_LOST | IPR_PCII_MMIO_ERROR) * Misc literals */ #define IPR_NUM_IOADL_ENTRIES IPR_MAX_SGLIST +#define IPR_MAX_MSIX_VECTORS 0x5 +#define IPR_MAX_HRRQ_NUM 0x10 +#define IPR_INIT_HRRQ 0x0 /* * Adapter interface types @@ -404,7 +413,7 @@ struct ipr_config_table_entry64 { __be64 dev_id; __be64 lun; __be64 lun_wwn[2]; -#define IPR_MAX_RES_PATH_LENGTH 24 +#define IPR_MAX_RES_PATH_LENGTH 48 __be64 res_path; struct ipr_std_inq_data std_inq_data; u8 reserved2[4]; @@ -459,9 +468,39 @@ struct ipr_supported_device { u8 reserved2[16]; }__attribute__((packed, aligned (4))); +struct ipr_hrr_queue { + struct ipr_ioa_cfg *ioa_cfg; + __be32 *host_rrq; + dma_addr_t host_rrq_dma; +#define IPR_HRRQ_REQ_RESP_HANDLE_MASK 0xfffffffc +#define IPR_HRRQ_RESP_BIT_SET 0x00000002 +#define IPR_HRRQ_TOGGLE_BIT 0x00000001 +#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2 +#define IPR_ID_HRRQ_SELE_ENABLE 0x02 + volatile __be32 *hrrq_start; + volatile __be32 *hrrq_end; + volatile __be32 *hrrq_curr; + + struct list_head hrrq_free_q; + struct list_head hrrq_pending_q; + spinlock_t _lock; + spinlock_t *lock; + + volatile u32 toggle_bit; + u32 size; + u32 min_cmd_id; + u32 max_cmd_id; + u8 allow_interrupts:1; + u8 ioa_is_dead:1; + u8 allow_cmds:1; + + struct blk_iopoll iopoll; +}; + /* Command packet structure */ struct ipr_cmd_pkt { - __be16 reserved; /* Reserved by IOA */ + u8 reserved; /* Reserved by IOA */ + u8 hrrq_id; u8 request_type; #define IPR_RQTYPE_SCSICDB 0x00 #define IPR_RQTYPE_IOACMD 0x01 @@ -1022,6 +1061,10 @@ struct ipr_hostrcb64_fabric_desc { struct ipr_hostrcb64_config_element elem[1]; }__attribute__((packed, aligned (8))); +#define for_each_hrrq(hrrq, ioa_cfg) \ + for (hrrq = (ioa_cfg)->hrrq; \ + hrrq < ((ioa_cfg)->hrrq + (ioa_cfg)->hrrq_num); hrrq++) + #define for_each_fabric_cfg(fabric, cfg) \ for (cfg = (fabric)->elem; \ cfg < ((fabric)->elem + be16_to_cpu((fabric)->num_entries)); \ @@ -1308,6 +1351,7 @@ struct ipr_chip_cfg_t { u16 max_cmds; u8 cache_line_size; u8 clear_isr; + u32 iopoll_weight; struct ipr_interrupt_offsets regs; }; @@ -1317,6 +1361,7 @@ struct ipr_chip_t { u16 intr_type; #define IPR_USE_LSI 0x00 #define IPR_USE_MSI 0x01 +#define IPR_USE_MSIX 0x02 u16 sis_type; #define IPR_SIS32 0x00 #define IPR_SIS64 0x01 @@ -1375,13 +1420,10 @@ struct ipr_ioa_cfg { struct list_head queue; - u8 allow_interrupts:1; u8 in_reset_reload:1; u8 in_ioa_bringdown:1; u8 ioa_unit_checked:1; - u8 ioa_is_dead:1; u8 dump_taken:1; - u8 allow_cmds:1; u8 allow_ml_add_del:1; u8 needs_hard_reset:1; u8 dual_raid:1; @@ -1413,21 +1455,7 @@ struct ipr_ioa_cfg { char trace_start[8]; #define IPR_TRACE_START_LABEL "trace" struct ipr_trace_entry *trace; - u32 trace_index:IPR_NUM_TRACE_INDEX_BITS; - - /* - * Queue for free command blocks - */ - char ipr_free_label[8]; -#define IPR_FREEQ_LABEL "free-q" - struct list_head free_q; - - /* - * Queue for command blocks outstanding to the adapter - */ - char ipr_pending_label[8]; -#define IPR_PENDQ_LABEL "pend-q" - struct list_head pending_q; + atomic_t trace_index; char cfg_table_start[8]; #define IPR_CFG_TBL_START "cfg" @@ -1452,16 +1480,10 @@ struct ipr_ioa_cfg { struct list_head hostrcb_free_q; struct list_head hostrcb_pending_q; - __be32 *host_rrq; - dma_addr_t host_rrq_dma; -#define IPR_HRRQ_REQ_RESP_HANDLE_MASK 0xfffffffc -#define IPR_HRRQ_RESP_BIT_SET 0x00000002 -#define IPR_HRRQ_TOGGLE_BIT 0x00000001 -#define IPR_HRRQ_REQ_RESP_HANDLE_SHIFT 2 - volatile __be32 *hrrq_start; - volatile __be32 *hrrq_end; - volatile __be32 *hrrq_curr; - volatile u32 toggle_bit; + struct ipr_hrr_queue hrrq[IPR_MAX_HRRQ_NUM]; + u32 hrrq_num; + atomic_t hrrq_index; + u16 identify_hrrq_index; struct ipr_bus_attributes bus_attr[IPR_MAX_NUM_BUSES]; @@ -1507,6 +1529,17 @@ struct ipr_ioa_cfg { u32 max_cmds; struct ipr_cmnd **ipr_cmnd_list; dma_addr_t *ipr_cmnd_list_dma; + + u16 intr_flag; + unsigned int nvectors; + + struct { + unsigned short vec; + char desc[22]; + } vectors_info[IPR_MAX_MSIX_VECTORS]; + + u32 iopoll_weight; + }; /* struct ipr_ioa_cfg */ struct ipr_cmnd { @@ -1544,6 +1577,7 @@ struct ipr_cmnd { struct scsi_device *sdev; } u; + struct ipr_hrr_queue *hrrq; struct ipr_ioa_cfg *ioa_cfg; }; @@ -1717,7 +1751,8 @@ struct ipr_ucode_image_header { if (ipr_is_device(hostrcb)) { \ if ((hostrcb)->ioa_cfg->sis64) { \ printk(KERN_ERR IPR_NAME ": %s: " fmt, \ - ipr_format_res_path(hostrcb->hcam.u.error64.fd_res_path, \ + ipr_format_res_path(hostrcb->ioa_cfg, \ + hostrcb->hcam.u.error64.fd_res_path, \ hostrcb->rp_buffer, \ sizeof(hostrcb->rp_buffer)), \ __VA_ARGS__); \ diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index df4c13a5534..7706c99ec8b 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -466,11 +466,13 @@ enum intr_type_t { MSIX, }; +#define LPFC_CT_CTX_MAX 64 struct unsol_rcv_ct_ctx { uint32_t ctxt_id; uint32_t SID; - uint32_t flags; -#define UNSOL_VALID 0x00000001 + uint32_t valid; +#define UNSOL_INVALID 0 +#define UNSOL_VALID 1 uint16_t oxid; uint16_t rxid; }; @@ -750,6 +752,15 @@ struct lpfc_hba { void __iomem *ctrl_regs_memmap_p;/* Kernel memory mapped address for PCI BAR2 */ + void __iomem *pci_bar0_memmap_p; /* Kernel memory mapped address for + PCI BAR0 with dual-ULP support */ + void __iomem *pci_bar2_memmap_p; /* Kernel memory mapped address for + PCI BAR2 with dual-ULP support */ + void __iomem *pci_bar4_memmap_p; /* Kernel memory mapped address for + PCI BAR4 with dual-ULP support */ +#define PCI_64BIT_BAR0 0 +#define PCI_64BIT_BAR2 2 +#define PCI_64BIT_BAR4 4 void __iomem *MBslimaddr; /* virtual address for mbox cmds */ void __iomem *HAregaddr; /* virtual address for host attn reg */ void __iomem *CAregaddr; /* virtual address for chip attn reg */ @@ -938,7 +949,7 @@ struct lpfc_hba { spinlock_t ct_ev_lock; /* synchronize access to ct_ev_waiters */ struct list_head ct_ev_waiters; - struct unsol_rcv_ct_ctx ct_ctx[64]; + struct unsol_rcv_ct_ctx ct_ctx[LPFC_CT_CTX_MAX]; uint32_t ctx_idx; uint8_t menlo_flag; /* menlo generic flags */ diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index f7368eb8041..32d5683e618 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -955,9 +955,9 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, spin_lock_irqsave(&phba->ct_ev_lock, flags); if (phba->sli_rev == LPFC_SLI_REV4) { evt_dat->immed_dat = phba->ctx_idx; - phba->ctx_idx = (phba->ctx_idx + 1) % 64; + phba->ctx_idx = (phba->ctx_idx + 1) % LPFC_CT_CTX_MAX; /* Provide warning for over-run of the ct_ctx array */ - if (phba->ct_ctx[evt_dat->immed_dat].flags & + if (phba->ct_ctx[evt_dat->immed_dat].valid == UNSOL_VALID) lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, "2717 CT context array entry " @@ -973,7 +973,7 @@ lpfc_bsg_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, piocbq->iocb.unsli3.rcvsli3.ox_id; phba->ct_ctx[evt_dat->immed_dat].SID = piocbq->iocb.un.rcvels.remoteID; - phba->ct_ctx[evt_dat->immed_dat].flags = UNSOL_VALID; + phba->ct_ctx[evt_dat->immed_dat].valid = UNSOL_VALID; } else evt_dat->immed_dat = piocbq->iocb.ulpContext; @@ -1013,6 +1013,47 @@ error_ct_unsol_exit: } /** + * lpfc_bsg_ct_unsol_abort - handler ct abort to management plane + * @phba: Pointer to HBA context object. + * @dmabuf: pointer to a dmabuf that describes the FC sequence + * + * This function handles abort to the CT command toward management plane + * for SLI4 port. + * + * If the pending context of a CT command to management plane present, clears + * such context and returns 1 for handled; otherwise, it returns 0 indicating + * no context exists. + **/ +int +lpfc_bsg_ct_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf) +{ + struct fc_frame_header fc_hdr; + struct fc_frame_header *fc_hdr_ptr = &fc_hdr; + int ctx_idx, handled = 0; + uint16_t oxid, rxid; + uint32_t sid; + + memcpy(fc_hdr_ptr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header)); + sid = sli4_sid_from_fc_hdr(fc_hdr_ptr); + oxid = be16_to_cpu(fc_hdr_ptr->fh_ox_id); + rxid = be16_to_cpu(fc_hdr_ptr->fh_rx_id); + + for (ctx_idx = 0; ctx_idx < LPFC_CT_CTX_MAX; ctx_idx++) { + if (phba->ct_ctx[ctx_idx].valid != UNSOL_VALID) + continue; + if (phba->ct_ctx[ctx_idx].rxid != rxid) + continue; + if (phba->ct_ctx[ctx_idx].oxid != oxid) + continue; + if (phba->ct_ctx[ctx_idx].SID != sid) + continue; + phba->ct_ctx[ctx_idx].valid = UNSOL_INVALID; + handled = 1; + } + return handled; +} + +/** * lpfc_bsg_hba_set_event - process a SET_EVENT bsg vendor command * @job: SET_EVENT fc_bsg_job **/ @@ -1318,7 +1359,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, icmd->ulpClass = CLASS3; if (phba->sli_rev == LPFC_SLI_REV4) { /* Do not issue unsol response if oxid not marked as valid */ - if (!(phba->ct_ctx[tag].flags & UNSOL_VALID)) { + if (phba->ct_ctx[tag].valid != UNSOL_VALID) { rc = IOCB_ERROR; goto issue_ct_rsp_exit; } @@ -1352,7 +1393,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag, phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]; /* The exchange is done, mark the entry as invalid */ - phba->ct_ctx[tag].flags &= ~UNSOL_VALID; + phba->ct_ctx[tag].valid = UNSOL_INVALID; } else icmd->ulpContext = (ushort) tag; diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 69d66e3662c..76ca65dae78 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -164,8 +164,7 @@ void lpfc_hb_timeout_handler(struct lpfc_hba *); void lpfc_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); -void lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, - struct lpfc_iocbq *); +int lpfc_ct_handle_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *); int lpfc_ns_cmd(struct lpfc_vport *, int, uint8_t, uint32_t); int lpfc_fdmi_cmd(struct lpfc_vport *, struct lpfc_nodelist *, int); void lpfc_fdmi_tmo(unsigned long); @@ -427,6 +426,7 @@ int lpfc_bsg_request(struct fc_bsg_job *); int lpfc_bsg_timeout(struct fc_bsg_job *); int lpfc_bsg_ct_unsol_event(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); +int lpfc_bsg_ct_unsol_abort(struct lpfc_hba *, struct hbq_dmabuf *); void __lpfc_sli_ringtx_put(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *); struct lpfc_iocbq *lpfc_sli_ringtx_get(struct lpfc_hba *, diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 65f9fb6862e..7bff3a19af5 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -164,37 +164,24 @@ lpfc_ct_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, } /** - * lpfc_sli4_ct_abort_unsol_event - Default handle for sli4 unsol abort + * lpfc_ct_handle_unsol_abort - ct upper level protocol abort handler * @phba: Pointer to HBA context object. - * @pring: Pointer to the driver internal I/O ring. - * @piocbq: Pointer to the IOCBQ. + * @dmabuf: pointer to a dmabuf that describes the FC sequence * - * This function serves as the default handler for the sli4 unsolicited - * abort event. It shall be invoked when there is no application interface - * registered unsolicited abort handler. This handler does nothing but - * just simply releases the dma buffer used by the unsol abort event. + * This function serves as the upper level protocol abort handler for CT + * protocol. + * + * Return 1 if abort has been handled, 0 otherwise. **/ -void -lpfc_sli4_ct_abort_unsol_event(struct lpfc_hba *phba, - struct lpfc_sli_ring *pring, - struct lpfc_iocbq *piocbq) +int +lpfc_ct_handle_unsol_abort(struct lpfc_hba *phba, struct hbq_dmabuf *dmabuf) { - IOCB_t *icmd = &piocbq->iocb; - struct lpfc_dmabuf *bdeBuf; - uint32_t size; + int handled; - /* Forward abort event to any process registered to receive ct event */ - if (lpfc_bsg_ct_unsol_event(phba, pring, piocbq) == 0) - return; + /* CT upper level goes through BSG */ + handled = lpfc_bsg_ct_unsol_abort(phba, dmabuf); - /* If there is no BDE associated with IOCB, there is nothing to do */ - if (icmd->ulpBdeCount == 0) - return; - bdeBuf = piocbq->context2; - piocbq->context2 = NULL; - size = icmd->un.cont64[0].tus.f.bdeSize; - lpfc_ct_unsol_buffer(phba, piocbq, bdeBuf, size); - lpfc_in_buf_free(phba, bdeBuf); + return handled; } static void diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index b9440deaad4..08d156a9094 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -3122,6 +3122,13 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, case IOERR_SEQUENCE_TIMEOUT: case IOERR_INVALID_RPI: + if (cmd == ELS_CMD_PLOGI && + did == NameServer_DID) { + /* Continue forever if plogi to */ + /* the nameserver fails */ + maxretry = 0; + delay = 100; + } retry = 1; break; } @@ -6517,7 +6524,8 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, struct lpfc_nodelist *ndlp; struct ls_rjt stat; uint32_t *payload; - uint32_t cmd, did, newnode, rjt_err = 0; + uint32_t cmd, did, newnode; + uint8_t rjt_exp, rjt_err = 0; IOCB_t *icmd = &elsiocb->iocb; if (!vport || !(elsiocb->context2)) @@ -6606,12 +6614,14 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* If Nport discovery is delayed, reject PLOGIs */ if (vport->fc_flag & FC_DISC_DELAYED) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } if (vport->port_state < LPFC_DISC_AUTH) { if (!(phba->pport->fc_flag & FC_PT2PT) || (phba->pport->fc_flag & FC_PT2PT_PLOGI)) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } /* We get here, and drop thru, if we are PT2PT with @@ -6648,6 +6658,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, lpfc_send_els_event(vport, ndlp, payload); if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_LOGO); @@ -6661,6 +6672,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, lpfc_send_els_event(vport, ndlp, payload); if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLO); @@ -6680,6 +6692,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvADISC++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, @@ -6693,6 +6706,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvPDISC++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, @@ -6730,6 +6744,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, phba->fc_stat.elsRcvPRLI++; if (vport->port_state < LPFC_DISC_AUTH) { rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_NOTHING_MORE; break; } lpfc_disc_state_machine(vport, ndlp, elsiocb, NLP_EVT_RCV_PRLI); @@ -6813,6 +6828,11 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, if (newnode) lpfc_nlp_put(ndlp); break; + case ELS_CMD_REC: + /* receive this due to exchange closed */ + rjt_err = LSRJT_UNABLE_TPC; + rjt_exp = LSEXP_INVALID_OX_RX; + break; default: lpfc_debugfs_disc_trc(vport, LPFC_DISC_TRC_ELS_UNSOL, "RCV ELS cmd: cmd:x%x did:x%x/ste:x%x", @@ -6820,6 +6840,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, /* Unsupported ELS command, reject */ rjt_err = LSRJT_CMD_UNSUPPORTED; + rjt_exp = LSEXP_NOTHING_MORE; /* Unknown ELS command <elsCmd> received from NPORT <did> */ lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, @@ -6834,7 +6855,7 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, if (rjt_err) { memset(&stat, 0, sizeof(stat)); stat.un.b.lsRjtRsnCode = rjt_err; - stat.un.b.lsRjtRsnCodeExp = LSEXP_NOTHING_MORE; + stat.un.b.lsRjtRsnCodeExp = rjt_exp; lpfc_els_rsp_reject(vport, stat.un.lsRjtError, elsiocb, ndlp, NULL); } diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 7398ca862e9..e8c47603170 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -538,6 +538,7 @@ struct fc_vft_header { #define ELS_CMD_ECHO 0x10000000 #define ELS_CMD_TEST 0x11000000 #define ELS_CMD_RRQ 0x12000000 +#define ELS_CMD_REC 0x13000000 #define ELS_CMD_PRLI 0x20100014 #define ELS_CMD_PRLO 0x21100014 #define ELS_CMD_PRLO_ACC 0x02100014 @@ -574,6 +575,7 @@ struct fc_vft_header { #define ELS_CMD_ECHO 0x10 #define ELS_CMD_TEST 0x11 #define ELS_CMD_RRQ 0x12 +#define ELS_CMD_REC 0x13 #define ELS_CMD_PRLI 0x14001020 #define ELS_CMD_PRLO 0x14001021 #define ELS_CMD_PRLO_ACC 0x14001002 diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index a47cfbdd05f..6e93b886cd4 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -106,6 +106,7 @@ struct lpfc_sli_intf { #define LPFC_SLI4_MB_WORD_COUNT 64 #define LPFC_MAX_MQ_PAGE 8 +#define LPFC_MAX_WQ_PAGE_V0 4 #define LPFC_MAX_WQ_PAGE 8 #define LPFC_MAX_CQ_PAGE 4 #define LPFC_MAX_EQ_PAGE 8 @@ -703,24 +704,41 @@ struct lpfc_register { * BAR0. The offsets are the same so the driver must account for * any base address difference. */ -#define LPFC_RQ_DOORBELL 0x00A0 -#define lpfc_rq_doorbell_num_posted_SHIFT 16 -#define lpfc_rq_doorbell_num_posted_MASK 0x3FFF -#define lpfc_rq_doorbell_num_posted_WORD word0 -#define lpfc_rq_doorbell_id_SHIFT 0 -#define lpfc_rq_doorbell_id_MASK 0xFFFF -#define lpfc_rq_doorbell_id_WORD word0 - -#define LPFC_WQ_DOORBELL 0x0040 -#define lpfc_wq_doorbell_num_posted_SHIFT 24 -#define lpfc_wq_doorbell_num_posted_MASK 0x00FF -#define lpfc_wq_doorbell_num_posted_WORD word0 -#define lpfc_wq_doorbell_index_SHIFT 16 -#define lpfc_wq_doorbell_index_MASK 0x00FF -#define lpfc_wq_doorbell_index_WORD word0 -#define lpfc_wq_doorbell_id_SHIFT 0 -#define lpfc_wq_doorbell_id_MASK 0xFFFF -#define lpfc_wq_doorbell_id_WORD word0 +#define LPFC_ULP0_RQ_DOORBELL 0x00A0 +#define LPFC_ULP1_RQ_DOORBELL 0x00C0 +#define lpfc_rq_db_list_fm_num_posted_SHIFT 24 +#define lpfc_rq_db_list_fm_num_posted_MASK 0x00FF +#define lpfc_rq_db_list_fm_num_posted_WORD word0 +#define lpfc_rq_db_list_fm_index_SHIFT 16 +#define lpfc_rq_db_list_fm_index_MASK 0x00FF +#define lpfc_rq_db_list_fm_index_WORD word0 +#define lpfc_rq_db_list_fm_id_SHIFT 0 +#define lpfc_rq_db_list_fm_id_MASK 0xFFFF +#define lpfc_rq_db_list_fm_id_WORD word0 +#define lpfc_rq_db_ring_fm_num_posted_SHIFT 16 +#define lpfc_rq_db_ring_fm_num_posted_MASK 0x3FFF +#define lpfc_rq_db_ring_fm_num_posted_WORD word0 +#define lpfc_rq_db_ring_fm_id_SHIFT 0 +#define lpfc_rq_db_ring_fm_id_MASK 0xFFFF +#define lpfc_rq_db_ring_fm_id_WORD word0 + +#define LPFC_ULP0_WQ_DOORBELL 0x0040 +#define LPFC_ULP1_WQ_DOORBELL 0x0060 +#define lpfc_wq_db_list_fm_num_posted_SHIFT 24 +#define lpfc_wq_db_list_fm_num_posted_MASK 0x00FF +#define lpfc_wq_db_list_fm_num_posted_WORD word0 +#define lpfc_wq_db_list_fm_index_SHIFT 16 +#define lpfc_wq_db_list_fm_index_MASK 0x00FF +#define lpfc_wq_db_list_fm_index_WORD word0 +#define lpfc_wq_db_list_fm_id_SHIFT 0 +#define lpfc_wq_db_list_fm_id_MASK 0xFFFF +#define lpfc_wq_db_list_fm_id_WORD word0 +#define lpfc_wq_db_ring_fm_num_posted_SHIFT 16 +#define lpfc_wq_db_ring_fm_num_posted_MASK 0x3FFF +#define lpfc_wq_db_ring_fm_num_posted_WORD word0 +#define lpfc_wq_db_ring_fm_id_SHIFT 0 +#define lpfc_wq_db_ring_fm_id_MASK 0xFFFF +#define lpfc_wq_db_ring_fm_id_WORD word0 #define LPFC_EQCQ_DOORBELL 0x0120 #define lpfc_eqcq_doorbell_se_SHIFT 31 @@ -1131,12 +1149,22 @@ struct lpfc_mbx_wq_create { struct { /* Version 0 Request */ uint32_t word0; #define lpfc_mbx_wq_create_num_pages_SHIFT 0 -#define lpfc_mbx_wq_create_num_pages_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_num_pages_MASK 0x000000FF #define lpfc_mbx_wq_create_num_pages_WORD word0 +#define lpfc_mbx_wq_create_dua_SHIFT 8 +#define lpfc_mbx_wq_create_dua_MASK 0x00000001 +#define lpfc_mbx_wq_create_dua_WORD word0 #define lpfc_mbx_wq_create_cq_id_SHIFT 16 #define lpfc_mbx_wq_create_cq_id_MASK 0x0000FFFF #define lpfc_mbx_wq_create_cq_id_WORD word0 - struct dma_address page[LPFC_MAX_WQ_PAGE]; + struct dma_address page[LPFC_MAX_WQ_PAGE_V0]; + uint32_t word9; +#define lpfc_mbx_wq_create_bua_SHIFT 0 +#define lpfc_mbx_wq_create_bua_MASK 0x00000001 +#define lpfc_mbx_wq_create_bua_WORD word9 +#define lpfc_mbx_wq_create_ulp_num_SHIFT 8 +#define lpfc_mbx_wq_create_ulp_num_MASK 0x000000FF +#define lpfc_mbx_wq_create_ulp_num_WORD word9 } request; struct { /* Version 1 Request */ uint32_t word0; /* Word 0 is the same as in v0 */ @@ -1160,6 +1188,17 @@ struct lpfc_mbx_wq_create { #define lpfc_mbx_wq_create_q_id_SHIFT 0 #define lpfc_mbx_wq_create_q_id_MASK 0x0000FFFF #define lpfc_mbx_wq_create_q_id_WORD word0 + uint32_t doorbell_offset; + uint32_t word2; +#define lpfc_mbx_wq_create_bar_set_SHIFT 0 +#define lpfc_mbx_wq_create_bar_set_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_bar_set_WORD word2 +#define WQ_PCI_BAR_0_AND_1 0x00 +#define WQ_PCI_BAR_2_AND_3 0x01 +#define WQ_PCI_BAR_4_AND_5 0x02 +#define lpfc_mbx_wq_create_db_format_SHIFT 16 +#define lpfc_mbx_wq_create_db_format_MASK 0x0000FFFF +#define lpfc_mbx_wq_create_db_format_WORD word2 } response; } u; }; @@ -1223,14 +1262,31 @@ struct lpfc_mbx_rq_create { #define lpfc_mbx_rq_create_num_pages_SHIFT 0 #define lpfc_mbx_rq_create_num_pages_MASK 0x0000FFFF #define lpfc_mbx_rq_create_num_pages_WORD word0 +#define lpfc_mbx_rq_create_dua_SHIFT 16 +#define lpfc_mbx_rq_create_dua_MASK 0x00000001 +#define lpfc_mbx_rq_create_dua_WORD word0 +#define lpfc_mbx_rq_create_bqu_SHIFT 17 +#define lpfc_mbx_rq_create_bqu_MASK 0x00000001 +#define lpfc_mbx_rq_create_bqu_WORD word0 +#define lpfc_mbx_rq_create_ulp_num_SHIFT 24 +#define lpfc_mbx_rq_create_ulp_num_MASK 0x000000FF +#define lpfc_mbx_rq_create_ulp_num_WORD word0 struct rq_context context; struct dma_address page[LPFC_MAX_WQ_PAGE]; } request; struct { uint32_t word0; -#define lpfc_mbx_rq_create_q_id_SHIFT 0 -#define lpfc_mbx_rq_create_q_id_MASK 0x0000FFFF -#define lpfc_mbx_rq_create_q_id_WORD word0 +#define lpfc_mbx_rq_create_q_id_SHIFT 0 +#define lpfc_mbx_rq_create_q_id_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_q_id_WORD word0 + uint32_t doorbell_offset; + uint32_t word2; +#define lpfc_mbx_rq_create_bar_set_SHIFT 0 +#define lpfc_mbx_rq_create_bar_set_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_bar_set_WORD word2 +#define lpfc_mbx_rq_create_db_format_SHIFT 16 +#define lpfc_mbx_rq_create_db_format_MASK 0x0000FFFF +#define lpfc_mbx_rq_create_db_format_WORD word2 } response; } u; }; @@ -1388,6 +1444,33 @@ struct lpfc_mbx_get_rsrc_extent_info { } u; }; +struct lpfc_mbx_query_fw_config { + struct mbox_header header; + struct { + uint32_t config_number; +#define LPFC_FC_FCOE 0x00000007 + uint32_t asic_revision; + uint32_t physical_port; + uint32_t function_mode; +#define LPFC_FCOE_INI_MODE 0x00000040 +#define LPFC_FCOE_TGT_MODE 0x00000080 +#define LPFC_DUA_MODE 0x00000800 + uint32_t ulp0_mode; +#define LPFC_ULP_FCOE_INIT_MODE 0x00000040 +#define LPFC_ULP_FCOE_TGT_MODE 0x00000080 + uint32_t ulp0_nap_words[12]; + uint32_t ulp1_mode; + uint32_t ulp1_nap_words[12]; + uint32_t function_capabilities; + uint32_t cqid_base; + uint32_t cqid_tot; + uint32_t eqid_base; + uint32_t eqid_tot; + uint32_t ulp0_nap2_words[2]; + uint32_t ulp1_nap2_words[2]; + } rsp; +}; + struct lpfc_id_range { uint32_t word5; #define lpfc_mbx_rsrc_id_word4_0_SHIFT 0 @@ -1803,51 +1886,6 @@ struct lpfc_mbx_redisc_fcf_tbl { #define lpfc_mbx_redisc_fcf_index_WORD word12 }; -struct lpfc_mbx_query_fw_cfg { - struct mbox_header header; - uint32_t config_number; - uint32_t asic_rev; - uint32_t phys_port; - uint32_t function_mode; -/* firmware Function Mode */ -#define lpfc_function_mode_toe_SHIFT 0 -#define lpfc_function_mode_toe_MASK 0x00000001 -#define lpfc_function_mode_toe_WORD function_mode -#define lpfc_function_mode_nic_SHIFT 1 -#define lpfc_function_mode_nic_MASK 0x00000001 -#define lpfc_function_mode_nic_WORD function_mode -#define lpfc_function_mode_rdma_SHIFT 2 -#define lpfc_function_mode_rdma_MASK 0x00000001 -#define lpfc_function_mode_rdma_WORD function_mode -#define lpfc_function_mode_vm_SHIFT 3 -#define lpfc_function_mode_vm_MASK 0x00000001 -#define lpfc_function_mode_vm_WORD function_mode -#define lpfc_function_mode_iscsi_i_SHIFT 4 -#define lpfc_function_mode_iscsi_i_MASK 0x00000001 -#define lpfc_function_mode_iscsi_i_WORD function_mode -#define lpfc_function_mode_iscsi_t_SHIFT 5 -#define lpfc_function_mode_iscsi_t_MASK 0x00000001 -#define lpfc_function_mode_iscsi_t_WORD function_mode -#define lpfc_function_mode_fcoe_i_SHIFT 6 -#define lpfc_function_mode_fcoe_i_MASK 0x00000001 -#define lpfc_function_mode_fcoe_i_WORD function_mode -#define lpfc_function_mode_fcoe_t_SHIFT 7 -#define lpfc_function_mode_fcoe_t_MASK 0x00000001 -#define lpfc_function_mode_fcoe_t_WORD function_mode -#define lpfc_function_mode_dal_SHIFT 8 -#define lpfc_function_mode_dal_MASK 0x00000001 -#define lpfc_function_mode_dal_WORD function_mode -#define lpfc_function_mode_lro_SHIFT 9 -#define lpfc_function_mode_lro_MASK 0x00000001 -#define lpfc_function_mode_lro_WORD function_mode -#define lpfc_function_mode_flex10_SHIFT 10 -#define lpfc_function_mode_flex10_MASK 0x00000001 -#define lpfc_function_mode_flex10_WORD function_mode -#define lpfc_function_mode_ncsi_SHIFT 11 -#define lpfc_function_mode_ncsi_MASK 0x00000001 -#define lpfc_function_mode_ncsi_WORD function_mode -}; - /* Status field for embedded SLI_CONFIG mailbox command */ #define STATUS_SUCCESS 0x0 #define STATUS_FAILED 0x1 @@ -2965,7 +3003,7 @@ struct lpfc_mqe { struct lpfc_mbx_read_config rd_config; struct lpfc_mbx_request_features req_ftrs; struct lpfc_mbx_post_hdr_tmpl hdr_tmpl; - struct lpfc_mbx_query_fw_cfg query_fw_cfg; + struct lpfc_mbx_query_fw_config query_fw_cfg; struct lpfc_mbx_supp_pages supp_pages; struct lpfc_mbx_pc_sli4_params sli4_params; struct lpfc_mbx_get_sli4_parameters get_sli4_parameters; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 7de4ef14698..314b4f61b9e 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -6229,9 +6229,11 @@ lpfc_sli4_bar0_register_memmap(struct lpfc_hba *phba, uint32_t if_type) phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PORT_SEM_OFFSET; phba->sli4_hba.RQDBregaddr = - phba->sli4_hba.conf_regs_memmap_p + LPFC_RQ_DOORBELL; + phba->sli4_hba.conf_regs_memmap_p + + LPFC_ULP0_RQ_DOORBELL; phba->sli4_hba.WQDBregaddr = - phba->sli4_hba.conf_regs_memmap_p + LPFC_WQ_DOORBELL; + phba->sli4_hba.conf_regs_memmap_p + + LPFC_ULP0_WQ_DOORBELL; phba->sli4_hba.EQCQDBregaddr = phba->sli4_hba.conf_regs_memmap_p + LPFC_EQCQ_DOORBELL; phba->sli4_hba.MQDBregaddr = @@ -6285,9 +6287,11 @@ lpfc_sli4_bar2_register_memmap(struct lpfc_hba *phba, uint32_t vf) return -ENODEV; phba->sli4_hba.RQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + - vf * LPFC_VFR_PAGE_SIZE + LPFC_RQ_DOORBELL); + vf * LPFC_VFR_PAGE_SIZE + + LPFC_ULP0_RQ_DOORBELL); phba->sli4_hba.WQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + - vf * LPFC_VFR_PAGE_SIZE + LPFC_WQ_DOORBELL); + vf * LPFC_VFR_PAGE_SIZE + + LPFC_ULP0_WQ_DOORBELL); phba->sli4_hba.EQCQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + vf * LPFC_VFR_PAGE_SIZE + LPFC_EQCQ_DOORBELL); phba->sli4_hba.MQDBregaddr = (phba->sli4_hba.drbl_regs_memmap_p + @@ -6983,6 +6987,19 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) phba->sli4_hba.fcp_wq = NULL; } + if (phba->pci_bar0_memmap_p) { + iounmap(phba->pci_bar0_memmap_p); + phba->pci_bar0_memmap_p = NULL; + } + if (phba->pci_bar2_memmap_p) { + iounmap(phba->pci_bar2_memmap_p); + phba->pci_bar2_memmap_p = NULL; + } + if (phba->pci_bar4_memmap_p) { + iounmap(phba->pci_bar4_memmap_p); + phba->pci_bar4_memmap_p = NULL; + } + /* Release FCP CQ mapping array */ if (phba->sli4_hba.fcp_cq_map != NULL) { kfree(phba->sli4_hba.fcp_cq_map); @@ -7046,6 +7063,53 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) int rc = -ENOMEM; int fcp_eqidx, fcp_cqidx, fcp_wqidx; int fcp_cq_index = 0; + uint32_t shdr_status, shdr_add_status; + union lpfc_sli4_cfg_shdr *shdr; + LPFC_MBOXQ_t *mboxq; + uint32_t length; + + /* Check for dual-ULP support */ + mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); + if (!mboxq) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3249 Unable to allocate memory for " + "QUERY_FW_CFG mailbox command\n"); + return -ENOMEM; + } + length = (sizeof(struct lpfc_mbx_query_fw_config) - + sizeof(struct lpfc_sli4_cfg_mhdr)); + lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON, + LPFC_MBOX_OPCODE_QUERY_FW_CFG, + length, LPFC_SLI4_MBX_EMBED); + + rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); + + shdr = (union lpfc_sli4_cfg_shdr *) + &mboxq->u.mqe.un.sli4_config.header.cfg_shdr; + shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); + shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, &shdr->response); + if (shdr_status || shdr_add_status || rc) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3250 QUERY_FW_CFG mailbox failed with status " + "x%x add_status x%x, mbx status x%x\n", + shdr_status, shdr_add_status, rc); + if (rc != MBX_TIMEOUT) + mempool_free(mboxq, phba->mbox_mem_pool); + rc = -ENXIO; + goto out_error; + } + + phba->sli4_hba.fw_func_mode = + mboxq->u.mqe.un.query_fw_cfg.rsp.function_mode; + phba->sli4_hba.ulp0_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp0_mode; + phba->sli4_hba.ulp1_mode = mboxq->u.mqe.un.query_fw_cfg.rsp.ulp1_mode; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3251 QUERY_FW_CFG: func_mode:x%x, ulp0_mode:x%x, " + "ulp1_mode:x%x\n", phba->sli4_hba.fw_func_mode, + phba->sli4_hba.ulp0_mode, phba->sli4_hba.ulp1_mode); + + if (rc != MBX_TIMEOUT) + mempool_free(mboxq, phba->mbox_mem_pool); /* * Set up HBA Event Queues (EQs) @@ -7660,78 +7724,6 @@ out: } /** - * lpfc_sli4_send_nop_mbox_cmds - Send sli-4 nop mailbox commands - * @phba: pointer to lpfc hba data structure. - * @cnt: number of nop mailbox commands to send. - * - * This routine is invoked to send a number @cnt of NOP mailbox command and - * wait for each command to complete. - * - * Return: the number of NOP mailbox command completed. - **/ -static int -lpfc_sli4_send_nop_mbox_cmds(struct lpfc_hba *phba, uint32_t cnt) -{ - LPFC_MBOXQ_t *mboxq; - int length, cmdsent; - uint32_t mbox_tmo; - uint32_t rc = 0; - uint32_t shdr_status, shdr_add_status; - union lpfc_sli4_cfg_shdr *shdr; - - if (cnt == 0) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "2518 Requested to send 0 NOP mailbox cmd\n"); - return cnt; - } - - mboxq = (LPFC_MBOXQ_t *)mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL); - if (!mboxq) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "2519 Unable to allocate memory for issuing " - "NOP mailbox command\n"); - return 0; - } - - /* Set up NOP SLI4_CONFIG mailbox-ioctl command */ - length = (sizeof(struct lpfc_mbx_nop) - - sizeof(struct lpfc_sli4_cfg_mhdr)); - - for (cmdsent = 0; cmdsent < cnt; cmdsent++) { - lpfc_sli4_config(phba, mboxq, LPFC_MBOX_SUBSYSTEM_COMMON, - LPFC_MBOX_OPCODE_NOP, length, - LPFC_SLI4_MBX_EMBED); - if (!phba->sli4_hba.intr_enable) - rc = lpfc_sli_issue_mbox(phba, mboxq, MBX_POLL); - else { - mbox_tmo = lpfc_mbox_tmo_val(phba, mboxq); - rc = lpfc_sli_issue_mbox_wait(phba, mboxq, mbox_tmo); - } - if (rc == MBX_TIMEOUT) - break; - /* Check return status */ - shdr = (union lpfc_sli4_cfg_shdr *) - &mboxq->u.mqe.un.sli4_config.header.cfg_shdr; - shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); - shdr_add_status = bf_get(lpfc_mbox_hdr_add_status, - &shdr->response); - if (shdr_status || shdr_add_status || rc) { - lpfc_printf_log(phba, KERN_WARNING, LOG_INIT, - "2520 NOP mailbox command failed " - "status x%x add_status x%x mbx " - "status x%x\n", shdr_status, - shdr_add_status, rc); - break; - } - } - - if (rc != MBX_TIMEOUT) - mempool_free(mboxq, phba->mbox_mem_pool); - - return cmdsent; -} - -/** * lpfc_sli4_pci_mem_setup - Setup SLI4 HBA PCI memory space. * @phba: pointer to lpfc hba data structure. * @@ -8499,37 +8491,6 @@ lpfc_unset_hba(struct lpfc_hba *phba) } /** - * lpfc_sli4_unset_hba - Unset SLI4 hba device initialization. - * @phba: pointer to lpfc hba data structure. - * - * This routine is invoked to unset the HBA device initialization steps to - * a device with SLI-4 interface spec. - **/ -static void -lpfc_sli4_unset_hba(struct lpfc_hba *phba) -{ - struct lpfc_vport *vport = phba->pport; - struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - - spin_lock_irq(shost->host_lock); - vport->load_flag |= FC_UNLOADING; - spin_unlock_irq(shost->host_lock); - - phba->pport->work_port_events = 0; - - /* Stop the SLI4 device port */ - lpfc_stop_port(phba); - - lpfc_sli4_disable_intr(phba); - - /* Reset SLI4 HBA FCoE function */ - lpfc_pci_function_reset(phba); - lpfc_sli4_queue_destroy(phba); - - return; -} - -/** * lpfc_sli4_xri_exchange_busy_wait - Wait for device XRI exchange busy * @phba: Pointer to HBA context object. * @@ -9591,7 +9552,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) struct Scsi_Host *shost = NULL; int error, ret; uint32_t cfg_mode, intr_mode; - int mcnt; int adjusted_fcp_io_channel; /* Allocate memory for HBA structure */ @@ -9680,58 +9640,35 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) shost = lpfc_shost_from_vport(vport); /* save shost for error cleanup */ /* Now, trying to enable interrupt and bring up the device */ cfg_mode = phba->cfg_use_msi; - while (true) { - /* Put device to a known state before enabling interrupt */ - lpfc_stop_port(phba); - /* Configure and enable interrupt */ - intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); - if (intr_mode == LPFC_INTR_ERROR) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "0426 Failed to enable interrupt.\n"); - error = -ENODEV; - goto out_free_sysfs_attr; - } - /* Default to single EQ for non-MSI-X */ - if (phba->intr_type != MSIX) - adjusted_fcp_io_channel = 1; - else - adjusted_fcp_io_channel = phba->cfg_fcp_io_channel; - phba->cfg_fcp_io_channel = adjusted_fcp_io_channel; - /* Set up SLI-4 HBA */ - if (lpfc_sli4_hba_setup(phba)) { - lpfc_printf_log(phba, KERN_ERR, LOG_INIT, - "1421 Failed to set up hba\n"); - error = -ENODEV; - goto out_disable_intr; - } - /* Send NOP mbx cmds for non-INTx mode active interrupt test */ - if (intr_mode != 0) - mcnt = lpfc_sli4_send_nop_mbox_cmds(phba, - LPFC_ACT_INTR_CNT); - - /* Check active interrupts received only for MSI/MSI-X */ - if (intr_mode == 0 || - phba->sli.slistat.sli_intr >= LPFC_ACT_INTR_CNT) { - /* Log the current active interrupt mode */ - phba->intr_mode = intr_mode; - lpfc_log_intr_mode(phba, intr_mode); - break; - } - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "0451 Configure interrupt mode (%d) " - "failed active interrupt test.\n", - intr_mode); - /* Unset the previous SLI-4 HBA setup. */ - /* - * TODO: Is this operation compatible with IF TYPE 2 - * devices? All port state is deleted and cleared. - */ - lpfc_sli4_unset_hba(phba); - /* Try next level of interrupt mode */ - cfg_mode = --intr_mode; + /* Put device to a known state before enabling interrupt */ + lpfc_stop_port(phba); + /* Configure and enable interrupt */ + intr_mode = lpfc_sli4_enable_intr(phba, cfg_mode); + if (intr_mode == LPFC_INTR_ERROR) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "0426 Failed to enable interrupt.\n"); + error = -ENODEV; + goto out_free_sysfs_attr; + } + /* Default to single EQ for non-MSI-X */ + if (phba->intr_type != MSIX) + adjusted_fcp_io_channel = 1; + else + adjusted_fcp_io_channel = phba->cfg_fcp_io_channel; + phba->cfg_fcp_io_channel = adjusted_fcp_io_channel; + /* Set up SLI-4 HBA */ + if (lpfc_sli4_hba_setup(phba)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "1421 Failed to set up hba\n"); + error = -ENODEV; + goto out_disable_intr; } + /* Log the current active interrupt mode */ + phba->intr_mode = intr_mode; + lpfc_log_intr_mode(phba, intr_mode); + /* Perform post initialization setup */ lpfc_post_init_setup(phba); diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index d8fadcb2db7..46128c67920 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -1115,6 +1115,13 @@ out: "0261 Cannot Register NameServer login\n"); } + /* + ** In case the node reference counter does not go to zero, ensure that + ** the stale state for the node is not processed. + */ + + ndlp->nlp_prev_state = ndlp->nlp_state; + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DEFER_RM; spin_unlock_irq(shost->host_lock); @@ -2159,13 +2166,16 @@ lpfc_cmpl_plogi_npr_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, { struct lpfc_iocbq *cmdiocb, *rspiocb; IOCB_t *irsp; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); cmdiocb = (struct lpfc_iocbq *) arg; rspiocb = cmdiocb->context_un.rsp_iocb; irsp = &rspiocb->iocb; if (irsp->ulpStatus) { + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_DEFER_RM; + spin_unlock_irq(shost->host_lock); return NLP_STE_FREED_NODE; } return ndlp->nlp_state; diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index 60e5a177644..98af07c6e30 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -288,6 +288,26 @@ lpfc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason) } /** + * lpfc_change_queue_type() - Change a device's scsi tag queuing type + * @sdev: Pointer the scsi device whose queue depth is to change + * @tag_type: Identifier for queue tag type + */ +static int +lpfc_change_queue_type(struct scsi_device *sdev, int tag_type) +{ + if (sdev->tagged_supported) { + scsi_set_tag_type(sdev, tag_type); + if (tag_type) + scsi_activate_tcq(sdev, sdev->queue_depth); + else + scsi_deactivate_tcq(sdev, sdev->queue_depth); + } else + tag_type = 0; + + return tag_type; +} + +/** * lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread * @phba: The Hba for which this call is being executed. * @@ -3972,7 +3992,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd, break; } } else - fcp_cmnd->fcpCntl1 = 0; + fcp_cmnd->fcpCntl1 = SIMPLE_Q; sli4 = (phba->sli_rev == LPFC_SLI_REV4); @@ -5150,6 +5170,7 @@ struct scsi_host_template lpfc_template = { .max_sectors = 0xFFFF, .vendor_id = LPFC_NL_VENDOR_ID, .change_queue_depth = lpfc_change_queue_depth, + .change_queue_type = lpfc_change_queue_type, }; struct scsi_host_template lpfc_vport_template = { @@ -5172,4 +5193,5 @@ struct scsi_host_template lpfc_vport_template = { .shost_attrs = lpfc_vport_attrs, .max_sectors = 0xFFFF, .change_queue_depth = lpfc_change_queue_depth, + .change_queue_type = lpfc_change_queue_type, }; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 624eab37039..55b6fc83ad7 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -124,10 +124,17 @@ lpfc_sli4_wq_put(struct lpfc_queue *q, union lpfc_wqe *wqe) /* Ring Doorbell */ doorbell.word0 = 0; - bf_set(lpfc_wq_doorbell_num_posted, &doorbell, 1); - bf_set(lpfc_wq_doorbell_index, &doorbell, host_index); - bf_set(lpfc_wq_doorbell_id, &doorbell, q->queue_id); - writel(doorbell.word0, q->phba->sli4_hba.WQDBregaddr); + if (q->db_format == LPFC_DB_LIST_FORMAT) { + bf_set(lpfc_wq_db_list_fm_num_posted, &doorbell, 1); + bf_set(lpfc_wq_db_list_fm_index, &doorbell, host_index); + bf_set(lpfc_wq_db_list_fm_id, &doorbell, q->queue_id); + } else if (q->db_format == LPFC_DB_RING_FORMAT) { + bf_set(lpfc_wq_db_ring_fm_num_posted, &doorbell, 1); + bf_set(lpfc_wq_db_ring_fm_id, &doorbell, q->queue_id); + } else { + return -EINVAL; + } + writel(doorbell.word0, q->db_regaddr); return 0; } @@ -456,10 +463,20 @@ lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, /* Ring The Header Receive Queue Doorbell */ if (!(hq->host_index % hq->entry_repost)) { doorbell.word0 = 0; - bf_set(lpfc_rq_doorbell_num_posted, &doorbell, - hq->entry_repost); - bf_set(lpfc_rq_doorbell_id, &doorbell, hq->queue_id); - writel(doorbell.word0, hq->phba->sli4_hba.RQDBregaddr); + if (hq->db_format == LPFC_DB_RING_FORMAT) { + bf_set(lpfc_rq_db_ring_fm_num_posted, &doorbell, + hq->entry_repost); + bf_set(lpfc_rq_db_ring_fm_id, &doorbell, hq->queue_id); + } else if (hq->db_format == LPFC_DB_LIST_FORMAT) { + bf_set(lpfc_rq_db_list_fm_num_posted, &doorbell, + hq->entry_repost); + bf_set(lpfc_rq_db_list_fm_index, &doorbell, + hq->host_index); + bf_set(lpfc_rq_db_list_fm_id, &doorbell, hq->queue_id); + } else { + return -EINVAL; + } + writel(doorbell.word0, hq->db_regaddr); } return put_index; } @@ -4939,7 +4956,7 @@ out_free_mboxq: static void lpfc_sli4_arm_cqeq_intr(struct lpfc_hba *phba) { - uint8_t fcp_eqidx; + int fcp_eqidx; lpfc_sli4_cq_release(phba->sli4_hba.mbx_cq, LPFC_QUEUE_REARM); lpfc_sli4_cq_release(phba->sli4_hba.els_cq, LPFC_QUEUE_REARM); @@ -5622,6 +5639,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) } /* RPIs. */ count = phba->sli4_hba.max_cfg_param.max_rpi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3279 Invalid provisioning of " + "rpi:%d\n", count); + rc = -EINVAL; + goto err_exit; + } base = phba->sli4_hba.max_cfg_param.rpi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.rpi_bmask = kzalloc(longs * @@ -5644,6 +5668,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* VPIs. */ count = phba->sli4_hba.max_cfg_param.max_vpi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3280 Invalid provisioning of " + "vpi:%d\n", count); + rc = -EINVAL; + goto free_rpi_ids; + } base = phba->sli4_hba.max_cfg_param.vpi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->vpi_bmask = kzalloc(longs * @@ -5666,6 +5697,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* XRIs. */ count = phba->sli4_hba.max_cfg_param.max_xri; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3281 Invalid provisioning of " + "xri:%d\n", count); + rc = -EINVAL; + goto free_vpi_ids; + } base = phba->sli4_hba.max_cfg_param.xri_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.xri_bmask = kzalloc(longs * @@ -5689,6 +5727,13 @@ lpfc_sli4_alloc_resource_identifiers(struct lpfc_hba *phba) /* VFIs. */ count = phba->sli4_hba.max_cfg_param.max_vfi; + if (count <= 0) { + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, + "3282 Invalid provisioning of " + "vfi:%d\n", count); + rc = -EINVAL; + goto free_xri_ids; + } base = phba->sli4_hba.max_cfg_param.vfi_base; longs = (count + BITS_PER_LONG - 1) / BITS_PER_LONG; phba->sli4_hba.vfi_bmask = kzalloc(longs * @@ -8370,7 +8415,7 @@ __lpfc_sli_issue_iocb_s4(struct lpfc_hba *phba, uint32_t ring_number, * This is a continuation of a commandi,(CX) so this * sglq is on the active list */ - sglq = __lpfc_get_active_sglq(phba, piocb->sli4_xritag); + sglq = __lpfc_get_active_sglq(phba, piocb->sli4_lxritag); if (!sglq) return IOCB_ERROR; } @@ -8855,12 +8900,6 @@ lpfc_sli_setup(struct lpfc_hba *phba) pring->prt[3].type = FC_TYPE_CT; pring->prt[3].lpfc_sli_rcv_unsol_event = lpfc_ct_unsol_event; - /* abort unsolicited sequence */ - pring->prt[4].profile = 0; /* Mask 4 */ - pring->prt[4].rctl = FC_RCTL_BA_ABTS; - pring->prt[4].type = FC_TYPE_BLS; - pring->prt[4].lpfc_sli_rcv_unsol_event = - lpfc_sli4_ct_abort_unsol_event; break; } totiocbsize += (pring->sli.sli3.numCiocb * @@ -11873,7 +11912,7 @@ lpfc_sli4_hba_intr_handler(int irq, void *dev_id) struct lpfc_eqe *eqe; unsigned long iflag; int ecount = 0; - uint32_t fcp_eqidx; + int fcp_eqidx; /* Get the driver's phba structure from the dev_id */ fcp_eq_hdl = (struct lpfc_fcp_eq_hdl *)dev_id; @@ -11975,7 +12014,7 @@ lpfc_sli4_intr_handler(int irq, void *dev_id) struct lpfc_hba *phba; irqreturn_t hba_irq_rc; bool hba_handled = false; - uint32_t fcp_eqidx; + int fcp_eqidx; /* Get the driver's phba structure from the dev_id */ phba = (struct lpfc_hba *)dev_id; @@ -12097,6 +12136,54 @@ out_fail: } /** + * lpfc_dual_chute_pci_bar_map - Map pci base address register to host memory + * @phba: HBA structure that indicates port to create a queue on. + * @pci_barset: PCI BAR set flag. + * + * This function shall perform iomap of the specified PCI BAR address to host + * memory address if not already done so and return it. The returned host + * memory address can be NULL. + */ +static void __iomem * +lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset) +{ + struct pci_dev *pdev; + unsigned long bar_map, bar_map_len; + + if (!phba->pcidev) + return NULL; + else + pdev = phba->pcidev; + + switch (pci_barset) { + case WQ_PCI_BAR_0_AND_1: + if (!phba->pci_bar0_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR0); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR0); + phba->pci_bar0_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar0_memmap_p; + case WQ_PCI_BAR_2_AND_3: + if (!phba->pci_bar2_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR2); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR2); + phba->pci_bar2_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar2_memmap_p; + case WQ_PCI_BAR_4_AND_5: + if (!phba->pci_bar4_memmap_p) { + bar_map = pci_resource_start(pdev, PCI_64BIT_BAR4); + bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR4); + phba->pci_bar4_memmap_p = ioremap(bar_map, bar_map_len); + } + return phba->pci_bar4_memmap_p; + default: + break; + } + return NULL; +} + +/** * lpfc_modify_fcp_eq_delay - Modify Delay Multiplier on FCP EQs * @phba: HBA structure that indicates port to create a queue on. * @startq: The starting FCP EQ to modify @@ -12673,6 +12760,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, union lpfc_sli4_cfg_shdr *shdr; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; struct dma_address *page; + void __iomem *bar_memmap_p; + uint32_t db_offset; + uint16_t pci_barset; /* sanity check on queue memory */ if (!wq || !cq) @@ -12696,6 +12786,7 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, cq->queue_id); bf_set(lpfc_mbox_hdr_version, &shdr->request, phba->sli4_hba.pc_sli4_params.wqv); + if (phba->sli4_hba.pc_sli4_params.wqv == LPFC_Q_CREATE_VERSION_1) { bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1, wq->entry_count); @@ -12723,6 +12814,10 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, page[dmabuf->buffer_tag].addr_lo = putPaddrLow(dmabuf->phys); page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_wq_create_dua, &wq_create->u.request, 1); + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); @@ -12740,6 +12835,47 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, status = -ENXIO; goto out; } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { + wq->db_format = bf_get(lpfc_mbx_wq_create_db_format, + &wq_create->u.response); + if ((wq->db_format != LPFC_DB_LIST_FORMAT) && + (wq->db_format != LPFC_DB_RING_FORMAT)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3265 WQ[%d] doorbell format not " + "supported: x%x\n", wq->queue_id, + wq->db_format); + status = -EINVAL; + goto out; + } + pci_barset = bf_get(lpfc_mbx_wq_create_bar_set, + &wq_create->u.response); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3263 WQ[%d] failed to memmap pci " + "barset:x%x\n", wq->queue_id, + pci_barset); + status = -ENOMEM; + goto out; + } + db_offset = wq_create->u.response.doorbell_offset; + if ((db_offset != LPFC_ULP0_WQ_DOORBELL) && + (db_offset != LPFC_ULP1_WQ_DOORBELL)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3252 WQ[%d] doorbell offset not " + "supported: x%x\n", wq->queue_id, + db_offset); + status = -EINVAL; + goto out; + } + wq->db_regaddr = bar_memmap_p + db_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3264 WQ[%d]: barset:x%x, offset:x%x\n", + wq->queue_id, pci_barset, db_offset); + } else { + wq->db_format = LPFC_DB_LIST_FORMAT; + wq->db_regaddr = phba->sli4_hba.WQDBregaddr; + } wq->type = LPFC_WQ; wq->assoc_qid = cq->queue_id; wq->subtype = subtype; @@ -12816,6 +12952,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, uint32_t shdr_status, shdr_add_status; union lpfc_sli4_cfg_shdr *shdr; uint32_t hw_page_size = phba->sli4_hba.pc_sli4_params.if_page_sz; + void __iomem *bar_memmap_p; + uint32_t db_offset; + uint16_t pci_barset; /* sanity check on queue memory */ if (!hrq || !drq || !cq) @@ -12894,6 +13033,9 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, rq_create->u.request.page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1); + rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr_status = bf_get(lpfc_mbox_hdr_status, &shdr->response); @@ -12911,6 +13053,50 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, status = -ENXIO; goto out; } + + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) { + hrq->db_format = bf_get(lpfc_mbx_rq_create_db_format, + &rq_create->u.response); + if ((hrq->db_format != LPFC_DB_LIST_FORMAT) && + (hrq->db_format != LPFC_DB_RING_FORMAT)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3262 RQ [%d] doorbell format not " + "supported: x%x\n", hrq->queue_id, + hrq->db_format); + status = -EINVAL; + goto out; + } + + pci_barset = bf_get(lpfc_mbx_rq_create_bar_set, + &rq_create->u.response); + bar_memmap_p = lpfc_dual_chute_pci_bar_map(phba, pci_barset); + if (!bar_memmap_p) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3269 RQ[%d] failed to memmap pci " + "barset:x%x\n", hrq->queue_id, + pci_barset); + status = -ENOMEM; + goto out; + } + + db_offset = rq_create->u.response.doorbell_offset; + if ((db_offset != LPFC_ULP0_RQ_DOORBELL) && + (db_offset != LPFC_ULP1_RQ_DOORBELL)) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "3270 RQ[%d] doorbell offset not " + "supported: x%x\n", hrq->queue_id, + db_offset); + status = -EINVAL; + goto out; + } + hrq->db_regaddr = bar_memmap_p + db_offset; + lpfc_printf_log(phba, KERN_INFO, LOG_INIT, + "3266 RQ[qid:%d]: barset:x%x, offset:x%x\n", + hrq->queue_id, pci_barset, db_offset); + } else { + hrq->db_format = LPFC_DB_RING_FORMAT; + hrq->db_regaddr = phba->sli4_hba.RQDBregaddr; + } hrq->type = LPFC_HRQ; hrq->assoc_qid = cq->queue_id; hrq->subtype = subtype; @@ -12976,6 +13162,8 @@ lpfc_rq_create(struct lpfc_hba *phba, struct lpfc_queue *hrq, rq_create->u.request.page[dmabuf->buffer_tag].addr_hi = putPaddrHigh(dmabuf->phys); } + if (phba->sli4_hba.fw_func_mode & LPFC_DUA_MODE) + bf_set(lpfc_mbx_rq_create_dua, &rq_create->u.request, 1); rc = lpfc_sli_issue_mbox(phba, mbox, MBX_POLL); /* The IOCTL status is embedded in the mailbox subheader. */ shdr = (union lpfc_sli4_cfg_shdr *) &rq_create->header.cfg_shdr; @@ -14063,6 +14251,40 @@ lpfc_sli4_abort_partial_seq(struct lpfc_vport *vport, } /** + * lpfc_sli4_abort_ulp_seq - Abort assembled unsol sequence from ulp + * @vport: pointer to a vitural port + * @dmabuf: pointer to a dmabuf that describes the FC sequence + * + * This function tries to abort from the assembed sequence from upper level + * protocol, described by the information from basic abbort @dmabuf. It + * checks to see whether such pending context exists at upper level protocol. + * If so, it shall clean up the pending context. + * + * Return + * true -- if there is matching pending context of the sequence cleaned + * at ulp; + * false -- if there is no matching pending context of the sequence present + * at ulp. + **/ +static bool +lpfc_sli4_abort_ulp_seq(struct lpfc_vport *vport, struct hbq_dmabuf *dmabuf) +{ + struct lpfc_hba *phba = vport->phba; + int handled; + + /* Accepting abort at ulp with SLI4 only */ + if (phba->sli_rev < LPFC_SLI_REV4) + return false; + + /* Register all caring upper level protocols to attend abort */ + handled = lpfc_ct_handle_unsol_abort(phba, dmabuf); + if (handled) + return true; + + return false; +} + +/** * lpfc_sli4_seq_abort_rsp_cmpl - BLS ABORT RSP seq abort iocb complete handler * @phba: Pointer to HBA context object. * @cmd_iocbq: pointer to the command iocbq structure. @@ -14077,8 +14299,14 @@ lpfc_sli4_seq_abort_rsp_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmd_iocbq, struct lpfc_iocbq *rsp_iocbq) { - if (cmd_iocbq) + struct lpfc_nodelist *ndlp; + + if (cmd_iocbq) { + ndlp = (struct lpfc_nodelist *)cmd_iocbq->context1; + lpfc_nlp_put(ndlp); + lpfc_nlp_not_used(ndlp); lpfc_sli_release_iocbq(phba, cmd_iocbq); + } /* Failure means BLS ABORT RSP did not get delivered to remote node*/ if (rsp_iocbq && rsp_iocbq->iocb.ulpStatus) @@ -14118,9 +14346,10 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba, * event after aborting the sequence handling. **/ static void -lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, - struct fc_frame_header *fc_hdr) +lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr, bool aborted) { + struct lpfc_hba *phba = vport->phba; struct lpfc_iocbq *ctiocb = NULL; struct lpfc_nodelist *ndlp; uint16_t oxid, rxid, xri, lxri; @@ -14135,12 +14364,27 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, oxid = be16_to_cpu(fc_hdr->fh_ox_id); rxid = be16_to_cpu(fc_hdr->fh_rx_id); - ndlp = lpfc_findnode_did(phba->pport, sid); + ndlp = lpfc_findnode_did(vport, sid); if (!ndlp) { - lpfc_printf_log(phba, KERN_WARNING, LOG_ELS, - "1268 Find ndlp returned NULL for oxid:x%x " - "SID:x%x\n", oxid, sid); - return; + ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "1268 Failed to allocate ndlp for " + "oxid:x%x SID:x%x\n", oxid, sid); + return; + } + lpfc_nlp_init(vport, ndlp, sid); + /* Put ndlp onto pport node list */ + lpfc_enqueue_node(vport, ndlp); + } else if (!NLP_CHK_NODE_ACT(ndlp)) { + /* re-setup ndlp without removing from node list */ + ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); + if (!ndlp) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "3275 Failed to active ndlp found " + "for oxid:x%x SID:x%x\n", oxid, sid); + return; + } } /* Allocate buffer for rsp iocb */ @@ -14164,7 +14408,7 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, icmd->ulpLe = 1; icmd->ulpClass = CLASS3; icmd->ulpContext = phba->sli4_hba.rpi_ids[ndlp->nlp_rpi]; - ctiocb->context1 = ndlp; + ctiocb->context1 = lpfc_nlp_get(ndlp); ctiocb->iocb_cmpl = NULL; ctiocb->vport = phba->pport; @@ -14183,14 +14427,24 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, if (lxri != NO_XRI) lpfc_set_rrq_active(phba, ndlp, lxri, (xri == oxid) ? rxid : oxid, 0); - /* If the oxid maps to the FCP XRI range or if it is out of range, - * send a BLS_RJT. The driver no longer has that exchange. - * Override the IOCB for a BA_RJT. + /* For BA_ABTS from exchange responder, if the logical xri with + * the oxid maps to the FCP XRI range, the port no longer has + * that exchange context, send a BLS_RJT. Override the IOCB for + * a BA_RJT. */ - if (xri > (phba->sli4_hba.max_cfg_param.max_xri + - phba->sli4_hba.max_cfg_param.xri_base) || - xri > (lpfc_sli4_get_els_iocb_cnt(phba) + - phba->sli4_hba.max_cfg_param.xri_base)) { + if ((fctl & FC_FC_EX_CTX) && + (lxri > lpfc_sli4_get_els_iocb_cnt(phba))) { + icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT; + bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0); + bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID); + bf_set(lpfc_rsn_code, &icmd->un.bls_rsp, FC_BA_RJT_UNABLE); + } + + /* If BA_ABTS failed to abort a partially assembled receive sequence, + * the driver no longer has that exchange, send a BLS_RJT. Override + * the IOCB for a BA_RJT. + */ + if (aborted == false) { icmd->un.xseq64.w5.hcsw.Rctl = FC_RCTL_BA_RJT; bf_set(lpfc_vndr_code, &icmd->un.bls_rsp, 0); bf_set(lpfc_rsn_expln, &icmd->un.bls_rsp, FC_BA_RJT_INV_XID); @@ -14214,17 +14468,19 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_hba *phba, bf_set(lpfc_abts_oxid, &icmd->un.bls_rsp, oxid); /* Xmit CT abts response on exchange <xid> */ - lpfc_printf_log(phba, KERN_INFO, LOG_ELS, - "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n", - icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state); + lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, + "1200 Send BLS cmd x%x on oxid x%x Data: x%x\n", + icmd->un.xseq64.w5.hcsw.Rctl, oxid, phba->link_state); rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, ctiocb, 0); if (rc == IOCB_ERROR) { - lpfc_printf_log(phba, KERN_ERR, LOG_ELS, - "2925 Failed to issue CT ABTS RSP x%x on " - "xri x%x, Data x%x\n", - icmd->un.xseq64.w5.hcsw.Rctl, oxid, - phba->link_state); + lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + "2925 Failed to issue CT ABTS RSP x%x on " + "xri x%x, Data x%x\n", + icmd->un.xseq64.w5.hcsw.Rctl, oxid, + phba->link_state); + lpfc_nlp_put(ndlp); + ctiocb->context1 = NULL; lpfc_sli_release_iocbq(phba, ctiocb); } } @@ -14249,32 +14505,25 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport, struct lpfc_hba *phba = vport->phba; struct fc_frame_header fc_hdr; uint32_t fctl; - bool abts_par; + bool aborted; /* Make a copy of fc_hdr before the dmabuf being released */ memcpy(&fc_hdr, dmabuf->hbuf.virt, sizeof(struct fc_frame_header)); fctl = sli4_fctl_from_fc_hdr(&fc_hdr); if (fctl & FC_FC_EX_CTX) { - /* - * ABTS sent by responder to exchange, just free the buffer - */ - lpfc_in_buf_free(phba, &dmabuf->dbuf); + /* ABTS by responder to exchange, no cleanup needed */ + aborted = true; } else { - /* - * ABTS sent by initiator to exchange, need to do cleanup - */ - /* Try to abort partially assembled seq */ - abts_par = lpfc_sli4_abort_partial_seq(vport, dmabuf); - - /* Send abort to ULP if partially seq abort failed */ - if (abts_par == false) - lpfc_sli4_send_seq_to_ulp(vport, dmabuf); - else - lpfc_in_buf_free(phba, &dmabuf->dbuf); + /* ABTS by initiator to exchange, need to do cleanup */ + aborted = lpfc_sli4_abort_partial_seq(vport, dmabuf); + if (aborted == false) + aborted = lpfc_sli4_abort_ulp_seq(vport, dmabuf); } - /* Send basic accept (BA_ACC) to the abort requester */ - lpfc_sli4_seq_abort_rsp(phba, &fc_hdr); + lpfc_in_buf_free(phba, &dmabuf->dbuf); + + /* Respond with BA_ACC or BA_RJT accordingly */ + lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted); } /** @@ -15307,10 +15556,13 @@ lpfc_sli4_fcf_rr_next_index_get(struct lpfc_hba *phba) { uint16_t next_fcf_index; +initial_priority: /* Search start from next bit of currently registered FCF index */ + next_fcf_index = phba->fcf.current_rec.fcf_indx; + next_priority: - next_fcf_index = (phba->fcf.current_rec.fcf_indx + 1) % - LPFC_SLI4_FCF_TBL_INDX_MAX; + /* Determine the next fcf index to check */ + next_fcf_index = (next_fcf_index + 1) % LPFC_SLI4_FCF_TBL_INDX_MAX; next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask, LPFC_SLI4_FCF_TBL_INDX_MAX, next_fcf_index); @@ -15337,7 +15589,7 @@ next_priority: * at that level and continue the selection process. */ if (lpfc_check_next_fcf_pri_level(phba)) - goto next_priority; + goto initial_priority; lpfc_printf_log(phba, KERN_WARNING, LOG_FIP, "2844 No roundrobin failover FCF available\n"); if (next_fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX) diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 44c427a45d6..be02b59ea27 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -139,6 +139,10 @@ struct lpfc_queue { struct lpfc_sli_ring *pring; /* ptr to io ring associated with q */ + uint16_t db_format; +#define LPFC_DB_RING_FORMAT 0x01 +#define LPFC_DB_LIST_FORMAT 0x02 + void __iomem *db_regaddr; /* For q stats */ uint32_t q_cnt_1; uint32_t q_cnt_2; @@ -508,6 +512,10 @@ struct lpfc_sli4_hba { struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */ struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */ + uint8_t fw_func_mode; /* FW function protocol mode */ + uint32_t ulp0_mode; /* ULP0 protocol mode */ + uint32_t ulp1_mode; /* ULP1 protocol mode */ + /* Setup information for various queue parameters */ int eq_esize; int eq_ecount; diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index ba596e854bb..f3b7795a296 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -18,7 +18,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "8.3.36" +#define LPFC_DRIVER_VERSION "8.3.37" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.c b/drivers/scsi/mpt2sas/mpt2sas_base.c index ffd85c511c8..5e24e7e7371 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.c +++ b/drivers/scsi/mpt2sas/mpt2sas_base.c @@ -155,7 +155,7 @@ _base_fault_reset_work(struct work_struct *work) struct task_struct *p; spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags); - if (ioc->shost_recovery) + if (ioc->shost_recovery || ioc->pci_error_recovery) goto rearm_timer; spin_unlock_irqrestore(&ioc->ioc_reset_in_progress_lock, flags); @@ -164,6 +164,20 @@ _base_fault_reset_work(struct work_struct *work) printk(MPT2SAS_INFO_FMT "%s : SAS host is non-operational !!!!\n", ioc->name, __func__); + /* It may be possible that EEH recovery can resolve some of + * pci bus failure issues rather removing the dead ioc function + * by considering controller is in a non-operational state. So + * here priority is given to the EEH recovery. If it doesn't + * not resolve this issue, mpt2sas driver will consider this + * controller to non-operational state and remove the dead ioc + * function. + */ + if (ioc->non_operational_loop++ < 5) { + spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, + flags); + goto rearm_timer; + } + /* * Call _scsih_flush_pending_cmds callback so that we flush all * pending commands back to OS. This call is required to aovid @@ -193,6 +207,8 @@ _base_fault_reset_work(struct work_struct *work) return; /* don't rearm timer */ } + ioc->non_operational_loop = 0; + if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { rc = mpt2sas_base_hard_reset_handler(ioc, CAN_SLEEP, FORCE_BIG_HAMMER); @@ -4386,6 +4402,7 @@ mpt2sas_base_attach(struct MPT2SAS_ADAPTER *ioc) if (missing_delay[0] != -1 && missing_delay[1] != -1) _base_update_missing_delay(ioc, missing_delay[0], missing_delay[1]); + ioc->non_operational_loop = 0; return 0; diff --git a/drivers/scsi/mpt2sas/mpt2sas_base.h b/drivers/scsi/mpt2sas/mpt2sas_base.h index 543d8d63747..c6ee7aad750 100644 --- a/drivers/scsi/mpt2sas/mpt2sas_base.h +++ b/drivers/scsi/mpt2sas/mpt2sas_base.h @@ -835,6 +835,7 @@ struct MPT2SAS_ADAPTER { u16 cpu_msix_table_sz; u32 ioc_reset_count; MPT2SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds; + u32 non_operational_loop; /* internal commands, callback index */ u8 scsi_io_cb_idx; diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 04f8010f077..18360032a52 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> @@ -1310,7 +1309,6 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, void *sg_local, *chain; u32 chain_offset; u32 chain_length; - u32 chain_flags; int sges_left; u32 sges_in_segment; u8 simple_sgl_flags; @@ -1356,8 +1354,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, sges_in_segment--; } - /* initializing the chain flags and pointers */ - chain_flags = MPI2_SGE_FLAGS_CHAIN_ELEMENT << MPI2_SGE_FLAGS_SHIFT; + /* initializing the pointers */ chain_req = _base_get_chain_buffer_tracker(ioc, smid); if (!chain_req) return -1; diff --git a/drivers/scsi/mpt3sas/mpt3sas_config.c b/drivers/scsi/mpt3sas/mpt3sas_config.c index ce7e59b2fc0..1df9ed4f371 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_config.c +++ b/drivers/scsi/mpt3sas/mpt3sas_config.c @@ -41,7 +41,6 @@ * USA. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 8af944d7d13..054d5231c97 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> @@ -3136,7 +3135,7 @@ _ctl_diag_trigger_mpi_store(struct device *cdev, spin_lock_irqsave(&ioc->diag_trigger_lock, flags); sz = min(sizeof(struct SL_WH_MPI_TRIGGERS_T), count); memset(&ioc->diag_trigger_mpi, 0, - sizeof(struct SL_WH_EVENT_TRIGGERS_T)); + sizeof(ioc->diag_trigger_mpi)); memcpy(&ioc->diag_trigger_mpi, buf, sz); if (ioc->diag_trigger_mpi.ValidEntries > NUM_VALID_ENTRIES) ioc->diag_trigger_mpi.ValidEntries = NUM_VALID_ENTRIES; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 6421a06c4ce..dcbf7c880cb 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -41,7 +41,6 @@ * USA. */ -#include <linux/version.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> @@ -2755,13 +2754,11 @@ _scsih_block_io_to_children_attached_directly(struct MPT3SAS_ADAPTER *ioc, int i; u16 handle; u16 reason_code; - u8 phy_number; for (i = 0; i < event_data->NumEntries; i++) { handle = le16_to_cpu(event_data->PHY[i].AttachedDevHandle); if (!handle) continue; - phy_number = event_data->StartPhyNum + i; reason_code = event_data->PHY[i].PhyStatus & MPI2_EVENT_SAS_TOPO_RC_MASK; if (reason_code == MPI2_EVENT_SAS_TOPO_RC_DELAY_NOT_RESPONDING) diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c index da6c5f25749..6f8d6213040 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c +++ b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c @@ -42,7 +42,6 @@ * USA. */ -#include <linux/version.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/errno.h> diff --git a/drivers/scsi/qla4xxx/ql4_83xx.c b/drivers/scsi/qla4xxx/ql4_83xx.c index 6e9af20be12..5d8fe4f7565 100644 --- a/drivers/scsi/qla4xxx/ql4_83xx.c +++ b/drivers/scsi/qla4xxx/ql4_83xx.c @@ -538,7 +538,7 @@ struct device_info { int port_num; }; -static int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha) +int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha) { uint32_t drv_active; uint32_t dev_part, dev_part1, dev_part2; @@ -1351,31 +1351,58 @@ exit_start_fw: /*----------------------Interrupt Related functions ---------------------*/ -void qla4_83xx_disable_intrs(struct scsi_qla_host *ha) +static void qla4_83xx_disable_iocb_intrs(struct scsi_qla_host *ha) +{ + if (test_and_clear_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) + qla4_8xxx_intr_disable(ha); +} + +static void qla4_83xx_disable_mbox_intrs(struct scsi_qla_host *ha) { uint32_t mb_int, ret; - if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags)) - qla4_8xxx_mbx_intr_disable(ha); + if (test_and_clear_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) { + ret = readl(&ha->qla4_83xx_reg->mbox_int); + mb_int = ret & ~INT_ENABLE_FW_MB; + writel(mb_int, &ha->qla4_83xx_reg->mbox_int); + writel(1, &ha->qla4_83xx_reg->leg_int_mask); + } +} - ret = readl(&ha->qla4_83xx_reg->mbox_int); - mb_int = ret & ~INT_ENABLE_FW_MB; - writel(mb_int, &ha->qla4_83xx_reg->mbox_int); - writel(1, &ha->qla4_83xx_reg->leg_int_mask); +void qla4_83xx_disable_intrs(struct scsi_qla_host *ha) +{ + qla4_83xx_disable_mbox_intrs(ha); + qla4_83xx_disable_iocb_intrs(ha); } -void qla4_83xx_enable_intrs(struct scsi_qla_host *ha) +static void qla4_83xx_enable_iocb_intrs(struct scsi_qla_host *ha) +{ + if (!test_bit(AF_83XX_IOCB_INTR_ON, &ha->flags)) { + qla4_8xxx_intr_enable(ha); + set_bit(AF_83XX_IOCB_INTR_ON, &ha->flags); + } +} + +void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha) { uint32_t mb_int; - qla4_8xxx_mbx_intr_enable(ha); - mb_int = INT_ENABLE_FW_MB; - writel(mb_int, &ha->qla4_83xx_reg->mbox_int); - writel(0, &ha->qla4_83xx_reg->leg_int_mask); + if (!test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) { + mb_int = INT_ENABLE_FW_MB; + writel(mb_int, &ha->qla4_83xx_reg->mbox_int); + writel(0, &ha->qla4_83xx_reg->leg_int_mask); + set_bit(AF_83XX_MBOX_INTR_ON, &ha->flags); + } +} - set_bit(AF_INTERRUPTS_ON, &ha->flags); + +void qla4_83xx_enable_intrs(struct scsi_qla_host *ha) +{ + qla4_83xx_enable_mbox_intrs(ha); + qla4_83xx_enable_iocb_intrs(ha); } + void qla4_83xx_queue_mbox_cmd(struct scsi_qla_host *ha, uint32_t *mbx_cmd, int incount) { diff --git a/drivers/scsi/qla4xxx/ql4_attr.c b/drivers/scsi/qla4xxx/ql4_attr.c index 76819b71ada..19ee55a6226 100644 --- a/drivers/scsi/qla4xxx/ql4_attr.c +++ b/drivers/scsi/qla4xxx/ql4_attr.c @@ -74,16 +74,22 @@ qla4_8xxx_sysfs_write_fw_dump(struct file *filep, struct kobject *kobj, } break; case 2: - /* Reset HBA */ + /* Reset HBA and collect FW dump */ ha->isp_ops->idc_lock(ha); dev_state = qla4_8xxx_rd_direct(ha, QLA8XXX_CRB_DEV_STATE); if (dev_state == QLA8XXX_DEV_READY) { - ql4_printk(KERN_INFO, ha, - "%s: Setting Need reset, reset_owner is 0x%x.\n", - __func__, ha->func_num); + ql4_printk(KERN_INFO, ha, "%s: Setting Need reset\n", + __func__); qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DEV_STATE, QLA8XXX_DEV_NEED_RESET); - set_bit(AF_8XXX_RST_OWNER, &ha->flags); + if (is_qla8022(ha) || + (is_qla8032(ha) && + qla4_83xx_can_perform_reset(ha))) { + set_bit(AF_8XXX_RST_OWNER, &ha->flags); + set_bit(AF_FW_RECOVERY, &ha->flags); + ql4_printk(KERN_INFO, ha, "%s: Reset owner is 0x%x\n", + __func__, ha->func_num); + } } else ql4_printk(KERN_INFO, ha, "%s: Reset not performed as device state is 0x%x\n", diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index 329d553eae9..129f5dd0282 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h @@ -136,6 +136,7 @@ #define RESPONSE_QUEUE_DEPTH 64 #define QUEUE_SIZE 64 #define DMA_BUFFER_SIZE 512 +#define IOCB_HIWAT_CUSHION 4 /* * Misc @@ -180,6 +181,7 @@ #define DISABLE_ACB_TOV 30 #define IP_CONFIG_TOV 30 #define LOGIN_TOV 12 +#define BOOT_LOGIN_RESP_TOV 60 #define MAX_RESET_HA_RETRIES 2 #define FW_ALIVE_WAIT_TOV 3 @@ -314,6 +316,7 @@ struct ql4_tuple_ddb { * DDB flags. */ #define DF_RELOGIN 0 /* Relogin to device */ +#define DF_BOOT_TGT 1 /* Boot target entry */ #define DF_ISNS_DISCOVERED 2 /* Device was discovered via iSNS */ #define DF_FO_MASKED 3 @@ -501,6 +504,7 @@ struct scsi_qla_host { #define AF_INTERRUPTS_ON 6 /* 0x00000040 */ #define AF_GET_CRASH_RECORD 7 /* 0x00000080 */ #define AF_LINK_UP 8 /* 0x00000100 */ +#define AF_LOOPBACK 9 /* 0x00000200 */ #define AF_IRQ_ATTACHED 10 /* 0x00000400 */ #define AF_DISABLE_ACB_COMPLETE 11 /* 0x00000800 */ #define AF_HA_REMOVAL 12 /* 0x00001000 */ @@ -516,6 +520,8 @@ struct scsi_qla_host { #define AF_8XXX_RST_OWNER 25 /* 0x02000000 */ #define AF_82XX_DUMP_READING 26 /* 0x04000000 */ #define AF_83XX_NO_FW_DUMP 27 /* 0x08000000 */ +#define AF_83XX_IOCB_INTR_ON 28 /* 0x10000000 */ +#define AF_83XX_MBOX_INTR_ON 29 /* 0x20000000 */ unsigned long dpc_flags; @@ -537,6 +543,7 @@ struct scsi_qla_host { uint32_t tot_ddbs; uint16_t iocb_cnt; + uint16_t iocb_hiwat; /* SRB cache. */ #define SRB_MIN_REQ 128 @@ -838,7 +845,8 @@ static inline int is_aer_supported(struct scsi_qla_host *ha) static inline int adapter_up(struct scsi_qla_host *ha) { return (test_bit(AF_ONLINE, &ha->flags) != 0) && - (test_bit(AF_LINK_UP, &ha->flags) != 0); + (test_bit(AF_LINK_UP, &ha->flags) != 0) && + (!test_bit(AF_LOOPBACK, &ha->flags)); } static inline struct scsi_qla_host* to_qla_host(struct Scsi_Host *shost) diff --git a/drivers/scsi/qla4xxx/ql4_fw.h b/drivers/scsi/qla4xxx/ql4_fw.h index 1c479502035..ad9d2e2d370 100644 --- a/drivers/scsi/qla4xxx/ql4_fw.h +++ b/drivers/scsi/qla4xxx/ql4_fw.h @@ -495,7 +495,7 @@ struct qla_flt_region { #define MBOX_ASTS_IPV6_LCL_PREFIX_IGNORED 0x802D #define MBOX_ASTS_ICMPV6_ERROR_MSG_RCVD 0x802E #define MBOX_ASTS_IDC_COMPLETE 0x8100 -#define MBOX_ASTS_IDC_NOTIFY 0x8101 +#define MBOX_ASTS_IDC_REQUEST_NOTIFICATION 0x8101 #define MBOX_ASTS_TXSCVR_INSERTED 0x8130 #define MBOX_ASTS_TXSCVR_REMOVED 0x8131 @@ -522,6 +522,10 @@ struct qla_flt_region { #define FLASH_OPT_COMMIT 2 #define FLASH_OPT_RMW_COMMIT 3 +/* Loopback type */ +#define ENABLE_INTERNAL_LOOPBACK 0x04 +#define ENABLE_EXTERNAL_LOOPBACK 0x08 + /*************************************************************************/ /* Host Adapter Initialization Control Block (from host) */ diff --git a/drivers/scsi/qla4xxx/ql4_glbl.h b/drivers/scsi/qla4xxx/ql4_glbl.h index 57a5a3cf577..982293edf02 100644 --- a/drivers/scsi/qla4xxx/ql4_glbl.h +++ b/drivers/scsi/qla4xxx/ql4_glbl.h @@ -253,12 +253,14 @@ void qla4_8xxx_set_rst_ready(struct scsi_qla_host *ha); void qla4_8xxx_clear_rst_ready(struct scsi_qla_host *ha); int qla4_8xxx_device_bootstrap(struct scsi_qla_host *ha); void qla4_8xxx_get_minidump(struct scsi_qla_host *ha); -int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha); -int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha); +int qla4_8xxx_intr_disable(struct scsi_qla_host *ha); +int qla4_8xxx_intr_enable(struct scsi_qla_host *ha); int qla4_8xxx_set_param(struct scsi_qla_host *ha, int param); int qla4_8xxx_update_idc_reg(struct scsi_qla_host *ha); int qla4_83xx_post_idc_ack(struct scsi_qla_host *ha); void qla4_83xx_disable_pause(struct scsi_qla_host *ha); +void qla4_83xx_enable_mbox_intrs(struct scsi_qla_host *ha); +int qla4_83xx_can_perform_reset(struct scsi_qla_host *ha); extern int ql4xextended_error_logging; extern int ql4xdontresethba; diff --git a/drivers/scsi/qla4xxx/ql4_init.c b/drivers/scsi/qla4xxx/ql4_init.c index 1aca1b4f70b..8fc8548ba4b 100644 --- a/drivers/scsi/qla4xxx/ql4_init.c +++ b/drivers/scsi/qla4xxx/ql4_init.c @@ -195,12 +195,10 @@ exit_get_sys_info_no_free: * @ha: pointer to host adapter structure. * **/ -static int qla4xxx_init_local_data(struct scsi_qla_host *ha) +static void qla4xxx_init_local_data(struct scsi_qla_host *ha) { /* Initialize aen queue */ ha->aen_q_count = MAX_AEN_ENTRIES; - - return qla4xxx_get_firmware_status(ha); } static uint8_t @@ -935,14 +933,23 @@ int qla4xxx_initialize_adapter(struct scsi_qla_host *ha, int is_reset) if (ha->isp_ops->start_firmware(ha) == QLA_ERROR) goto exit_init_hba; + /* + * For ISP83XX, mailbox and IOCB interrupts are enabled separately. + * Mailbox interrupts must be enabled prior to issuing any mailbox + * command in order to prevent the possibility of losing interrupts + * while switching from polling to interrupt mode. IOCB interrupts are + * enabled via isp_ops->enable_intrs. + */ + if (is_qla8032(ha)) + qla4_83xx_enable_mbox_intrs(ha); + if (qla4xxx_about_firmware(ha) == QLA_ERROR) goto exit_init_hba; if (ha->isp_ops->get_sys_info(ha) == QLA_ERROR) goto exit_init_hba; - if (qla4xxx_init_local_data(ha) == QLA_ERROR) - goto exit_init_hba; + qla4xxx_init_local_data(ha); status = qla4xxx_init_firmware(ha); if (status == QLA_ERROR) diff --git a/drivers/scsi/qla4xxx/ql4_iocb.c b/drivers/scsi/qla4xxx/ql4_iocb.c index f48f37a281d..14fec976f63 100644 --- a/drivers/scsi/qla4xxx/ql4_iocb.c +++ b/drivers/scsi/qla4xxx/ql4_iocb.c @@ -316,7 +316,7 @@ int qla4xxx_send_command_to_isp(struct scsi_qla_host *ha, struct srb * srb) goto queuing_error; /* total iocbs active */ - if ((ha->iocb_cnt + req_cnt) >= REQUEST_QUEUE_DEPTH) + if ((ha->iocb_cnt + req_cnt) >= ha->iocb_hiwat) goto queuing_error; /* Build command packet */ diff --git a/drivers/scsi/qla4xxx/ql4_isr.c b/drivers/scsi/qla4xxx/ql4_isr.c index 15ea81465ce..1b83dc283d2 100644 --- a/drivers/scsi/qla4xxx/ql4_isr.c +++ b/drivers/scsi/qla4xxx/ql4_isr.c @@ -582,6 +582,33 @@ exit_prq_error: } /** + * qla4_83xx_loopback_in_progress: Is loopback in progress? + * @ha: Pointer to host adapter structure. + * @ret: 1 = loopback in progress, 0 = loopback not in progress + **/ +static int qla4_83xx_loopback_in_progress(struct scsi_qla_host *ha) +{ + int rval = 1; + + if (is_qla8032(ha)) { + if ((ha->idc_info.info2 & ENABLE_INTERNAL_LOOPBACK) || + (ha->idc_info.info2 & ENABLE_EXTERNAL_LOOPBACK)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Loopback diagnostics in progress\n", + __func__)); + rval = 1; + } else { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Loopback diagnostics not in progress\n", + __func__)); + rval = 0; + } + } + + return rval; +} + +/** * qla4xxx_isr_decode_mailbox - decodes mailbox status * @ha: Pointer to host adapter structure. * @mailbox_status: Mailbox status. @@ -676,8 +703,10 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, case MBOX_ASTS_LINK_DOWN: clear_bit(AF_LINK_UP, &ha->flags); - if (test_bit(AF_INIT_DONE, &ha->flags)) + if (test_bit(AF_INIT_DONE, &ha->flags)) { set_bit(DPC_LINK_CHANGED, &ha->dpc_flags); + qla4xxx_wake_dpc(ha); + } ql4_printk(KERN_INFO, ha, "%s: LINK DOWN\n", __func__); qla4xxx_post_aen_work(ha, ISCSI_EVENT_LINKDOWN, @@ -806,7 +835,7 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, " removed\n", ha->host_no, mbox_sts[0])); break; - case MBOX_ASTS_IDC_NOTIFY: + case MBOX_ASTS_IDC_REQUEST_NOTIFICATION: { uint32_t opcode; if (is_qla8032(ha)) { @@ -840,6 +869,11 @@ static void qla4xxx_isr_decode_mailbox(struct scsi_qla_host * ha, DEBUG2(ql4_printk(KERN_INFO, ha, "scsi:%ld: AEN %04x IDC Complete notification\n", ha->host_no, mbox_sts[0])); + + if (qla4_83xx_loopback_in_progress(ha)) + set_bit(AF_LOOPBACK, &ha->flags); + else + clear_bit(AF_LOOPBACK, &ha->flags); } break; @@ -1124,17 +1158,18 @@ irqreturn_t qla4_83xx_intr_handler(int irq, void *dev_id) /* Legacy interrupt is valid if bit31 of leg_int_ptr is set */ if (!(leg_int_ptr & LEG_INT_PTR_B31)) { - ql4_printk(KERN_ERR, ha, - "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n", - __func__); + DEBUG2(ql4_printk(KERN_ERR, ha, + "%s: Legacy Interrupt Bit 31 not set, spurious interrupt!\n", + __func__)); return IRQ_NONE; } /* Validate the PCIE function ID set in leg_int_ptr bits [19..16] */ if ((leg_int_ptr & PF_BITS_MASK) != ha->pf_bit) { - ql4_printk(KERN_ERR, ha, - "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n", - __func__, (leg_int_ptr & PF_BITS_MASK), ha->pf_bit); + DEBUG2(ql4_printk(KERN_ERR, ha, + "%s: Incorrect function ID 0x%x in legacy interrupt register, ha->pf_bit = 0x%x\n", + __func__, (leg_int_ptr & PF_BITS_MASK), + ha->pf_bit)); return IRQ_NONE; } @@ -1437,11 +1472,14 @@ irq_not_attached: void qla4xxx_free_irqs(struct scsi_qla_host *ha) { - if (test_bit(AF_MSIX_ENABLED, &ha->flags)) - qla4_8xxx_disable_msix(ha); - else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) { - free_irq(ha->pdev->irq, ha); - pci_disable_msi(ha->pdev); - } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) - free_irq(ha->pdev->irq, ha); + if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) { + if (test_bit(AF_MSIX_ENABLED, &ha->flags)) { + qla4_8xxx_disable_msix(ha); + } else if (test_and_clear_bit(AF_MSI_ENABLED, &ha->flags)) { + free_irq(ha->pdev->irq, ha); + pci_disable_msi(ha->pdev); + } else if (test_and_clear_bit(AF_INTx_ENABLED, &ha->flags)) { + free_irq(ha->pdev->irq, ha); + } + } } diff --git a/drivers/scsi/qla4xxx/ql4_mbx.c b/drivers/scsi/qla4xxx/ql4_mbx.c index 3d41034191f..81e738d61ec 100644 --- a/drivers/scsi/qla4xxx/ql4_mbx.c +++ b/drivers/scsi/qla4xxx/ql4_mbx.c @@ -44,6 +44,30 @@ void qla4xxx_process_mbox_intr(struct scsi_qla_host *ha, int out_count) } /** + * qla4xxx_is_intr_poll_mode – Are we allowed to poll for interrupts? + * @ha: Pointer to host adapter structure. + * @ret: 1=polling mode, 0=non-polling mode + **/ +static int qla4xxx_is_intr_poll_mode(struct scsi_qla_host *ha) +{ + int rval = 1; + + if (is_qla8032(ha)) { + if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && + test_bit(AF_83XX_MBOX_INTR_ON, &ha->flags)) + rval = 0; + } else { + if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && + test_bit(AF_INTERRUPTS_ON, &ha->flags) && + test_bit(AF_ONLINE, &ha->flags) && + !test_bit(AF_HA_REMOVAL, &ha->flags)) + rval = 0; + } + + return rval; +} + +/** * qla4xxx_mailbox_command - issues mailbox commands * @ha: Pointer to host adapter structure. * @inCount: number of mailbox registers to load. @@ -153,33 +177,28 @@ int qla4xxx_mailbox_command(struct scsi_qla_host *ha, uint8_t inCount, /* * Wait for completion: Poll or completion queue */ - if (test_bit(AF_IRQ_ATTACHED, &ha->flags) && - test_bit(AF_INTERRUPTS_ON, &ha->flags) && - test_bit(AF_ONLINE, &ha->flags) && - !test_bit(AF_HA_REMOVAL, &ha->flags)) { - /* Do not poll for completion. Use completion queue */ - set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); - wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ); - clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); - } else { + if (qla4xxx_is_intr_poll_mode(ha)) { /* Poll for command to complete */ wait_count = jiffies + MBOX_TOV * HZ; while (test_bit(AF_MBOX_COMMAND_DONE, &ha->flags) == 0) { if (time_after_eq(jiffies, wait_count)) break; - /* * Service the interrupt. * The ISR will save the mailbox status registers * to a temporary storage location in the adapter * structure. */ - spin_lock_irqsave(&ha->hardware_lock, flags); ha->isp_ops->process_mailbox_interrupt(ha, outCount); spin_unlock_irqrestore(&ha->hardware_lock, flags); msleep(10); } + } else { + /* Do not poll for completion. Use completion queue */ + set_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); + wait_for_completion_timeout(&ha->mbx_intr_comp, MBOX_TOV * HZ); + clear_bit(AF_MBOX_COMMAND_NOPOLL, &ha->flags); } /* Check for mailbox timeout. */ @@ -678,8 +697,24 @@ int qla4xxx_get_firmware_status(struct scsi_qla_host * ha) return QLA_ERROR; } - ql4_printk(KERN_INFO, ha, "%ld firmware IOCBs available (%d).\n", - ha->host_no, mbox_sts[2]); + /* High-water mark of IOCBs */ + ha->iocb_hiwat = mbox_sts[2]; + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: firmware IOCBs available = %d\n", __func__, + ha->iocb_hiwat)); + + if (ha->iocb_hiwat > IOCB_HIWAT_CUSHION) + ha->iocb_hiwat -= IOCB_HIWAT_CUSHION; + + /* Ideally, we should not enter this code, as the # of firmware + * IOCBs is hard-coded in the firmware. We set a default + * iocb_hiwat here just in case */ + if (ha->iocb_hiwat == 0) { + ha->iocb_hiwat = REQUEST_QUEUE_DEPTH / 4; + DEBUG2(ql4_printk(KERN_WARNING, ha, + "%s: Setting IOCB's to = %d\n", __func__, + ha->iocb_hiwat)); + } return QLA_SUCCESS; } diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c index 499a92db1cf..71d3d234f52 100644 --- a/drivers/scsi/qla4xxx/ql4_nx.c +++ b/drivers/scsi/qla4xxx/ql4_nx.c @@ -2986,7 +2986,7 @@ int qla4_8xxx_load_risc(struct scsi_qla_host *ha) retval = qla4_8xxx_device_state_handler(ha); - if (retval == QLA_SUCCESS && !test_bit(AF_INIT_DONE, &ha->flags)) + if (retval == QLA_SUCCESS && !test_bit(AF_IRQ_ATTACHED, &ha->flags)) retval = qla4xxx_request_irqs(ha); return retval; @@ -3427,11 +3427,11 @@ int qla4_8xxx_get_sys_info(struct scsi_qla_host *ha) } /* Make sure we receive the minimum required data to cache internally */ - if (mbox_sts[4] < offsetof(struct mbx_sys_info, reserved)) { + if ((is_qla8032(ha) ? mbox_sts[3] : mbox_sts[4]) < + offsetof(struct mbx_sys_info, reserved)) { DEBUG2(printk("scsi%ld: %s: GET_SYS_INFO data receive" " error (%x)\n", ha->host_no, __func__, mbox_sts[4])); goto exit_validate_mac82; - } /* Save M.A.C. address & serial_number */ @@ -3463,7 +3463,7 @@ exit_validate_mac82: /* Interrupt handling helpers. */ -int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha) +int qla4_8xxx_intr_enable(struct scsi_qla_host *ha) { uint32_t mbox_cmd[MBOX_REG_COUNT]; uint32_t mbox_sts[MBOX_REG_COUNT]; @@ -3484,7 +3484,7 @@ int qla4_8xxx_mbx_intr_enable(struct scsi_qla_host *ha) return QLA_SUCCESS; } -int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha) +int qla4_8xxx_intr_disable(struct scsi_qla_host *ha) { uint32_t mbox_cmd[MBOX_REG_COUNT]; uint32_t mbox_sts[MBOX_REG_COUNT]; @@ -3509,7 +3509,7 @@ int qla4_8xxx_mbx_intr_disable(struct scsi_qla_host *ha) void qla4_82xx_enable_intrs(struct scsi_qla_host *ha) { - qla4_8xxx_mbx_intr_enable(ha); + qla4_8xxx_intr_enable(ha); spin_lock_irq(&ha->hardware_lock); /* BIT 10 - reset */ @@ -3522,7 +3522,7 @@ void qla4_82xx_disable_intrs(struct scsi_qla_host *ha) { if (test_and_clear_bit(AF_INTERRUPTS_ON, &ha->flags)) - qla4_8xxx_mbx_intr_disable(ha); + qla4_8xxx_intr_disable(ha); spin_lock_irq(&ha->hardware_lock); /* BIT 10 - set */ diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 4cec123a6a6..6142729167f 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -1337,18 +1337,18 @@ static int qla4xxx_session_get_param(struct iscsi_cls_session *cls_sess, sess->password_in, BIDI_CHAP, &idx); if (rval) - return -EINVAL; - - len = sprintf(buf, "%hu\n", idx); + len = sprintf(buf, "\n"); + else + len = sprintf(buf, "%hu\n", idx); break; case ISCSI_PARAM_CHAP_OUT_IDX: rval = qla4xxx_get_chap_index(ha, sess->username, sess->password, LOCAL_CHAP, &idx); if (rval) - return -EINVAL; - - len = sprintf(buf, "%hu\n", idx); + len = sprintf(buf, "\n"); + else + len = sprintf(buf, "%hu\n", idx); break; default: return iscsi_session_get_param(cls_sess, param, buf); @@ -2242,6 +2242,7 @@ static int qla4xxx_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) test_bit(DPC_HA_NEED_QUIESCENT, &ha->dpc_flags) || !test_bit(AF_ONLINE, &ha->flags) || !test_bit(AF_LINK_UP, &ha->flags) || + test_bit(AF_LOOPBACK, &ha->flags) || test_bit(DPC_RESET_HA_FW_CONTEXT, &ha->dpc_flags)) goto qc_host_busy; @@ -2978,6 +2979,7 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha) if (status == QLA_SUCCESS) { if (!test_bit(AF_FW_RECOVERY, &ha->flags)) qla4xxx_cmd_wait(ha); + ha->isp_ops->disable_intrs(ha); qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); qla4xxx_abort_active_cmds(ha, DID_RESET << 16); @@ -3479,7 +3481,8 @@ dpc_post_reset_ha: } /* ---- link change? --- */ - if (test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) { + if (!test_bit(AF_LOOPBACK, &ha->flags) && + test_and_clear_bit(DPC_LINK_CHANGED, &ha->dpc_flags)) { if (!test_bit(AF_LINK_UP, &ha->flags)) { /* ---- link down? --- */ qla4xxx_mark_all_devices_missing(ha); @@ -3508,10 +3511,8 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha) { qla4xxx_abort_active_cmds(ha, DID_NO_CONNECT << 16); - if (test_bit(AF_INTERRUPTS_ON, &ha->flags)) { - /* Turn-off interrupts on the card. */ - ha->isp_ops->disable_intrs(ha); - } + /* Turn-off interrupts on the card. */ + ha->isp_ops->disable_intrs(ha); if (is_qla40XX(ha)) { writel(set_rmask(CSR_SCSI_PROCESSOR_INTR), @@ -3547,8 +3548,7 @@ static void qla4xxx_free_adapter(struct scsi_qla_host *ha) } /* Detach interrupts */ - if (test_and_clear_bit(AF_IRQ_ATTACHED, &ha->flags)) - qla4xxx_free_irqs(ha); + qla4xxx_free_irqs(ha); /* free extra memory */ qla4xxx_mem_free(ha); @@ -4687,7 +4687,8 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, struct iscsi_endpoint *ep; struct sockaddr_in *addr; struct sockaddr_in6 *addr6; - struct sockaddr *dst_addr; + struct sockaddr *t_addr; + struct sockaddr_storage *dst_addr; char *ip; /* TODO: need to destroy on unload iscsi_endpoint*/ @@ -4696,21 +4697,23 @@ static struct iscsi_endpoint *qla4xxx_get_ep_fwdb(struct scsi_qla_host *ha, return NULL; if (fw_ddb_entry->options & DDB_OPT_IPV6_DEVICE) { - dst_addr->sa_family = AF_INET6; + t_addr = (struct sockaddr *)dst_addr; + t_addr->sa_family = AF_INET6; addr6 = (struct sockaddr_in6 *)dst_addr; ip = (char *)&addr6->sin6_addr; memcpy(ip, fw_ddb_entry->ip_addr, IPv6_ADDR_LEN); addr6->sin6_port = htons(le16_to_cpu(fw_ddb_entry->port)); } else { - dst_addr->sa_family = AF_INET; + t_addr = (struct sockaddr *)dst_addr; + t_addr->sa_family = AF_INET; addr = (struct sockaddr_in *)dst_addr; ip = (char *)&addr->sin_addr; memcpy(ip, fw_ddb_entry->ip_addr, IP_ADDR_LEN); addr->sin_port = htons(le16_to_cpu(fw_ddb_entry->port)); } - ep = qla4xxx_ep_connect(ha->host, dst_addr, 0); + ep = qla4xxx_ep_connect(ha->host, (struct sockaddr *)dst_addr, 0); vfree(dst_addr); return ep; } @@ -4725,7 +4728,8 @@ static int qla4xxx_verify_boot_idx(struct scsi_qla_host *ha, uint16_t idx) } static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha, - struct ddb_entry *ddb_entry) + struct ddb_entry *ddb_entry, + uint16_t idx) { uint16_t def_timeout; @@ -4745,6 +4749,10 @@ static void qla4xxx_setup_flash_ddb_entry(struct scsi_qla_host *ha, def_timeout : LOGIN_TOV; ddb_entry->default_time2wait = le16_to_cpu(ddb_entry->fw_ddb_entry.iscsi_def_time2wait); + + if (ql4xdisablesysfsboot && + (idx == ha->pri_ddb_idx || idx == ha->sec_ddb_idx)) + set_bit(DF_BOOT_TGT, &ddb_entry->flags); } static void qla4xxx_wait_for_ip_configuration(struct scsi_qla_host *ha) @@ -4881,7 +4889,7 @@ static void qla4xxx_remove_failed_ddb(struct scsi_qla_host *ha, static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, struct dev_db_entry *fw_ddb_entry, - int is_reset) + int is_reset, uint16_t idx) { struct iscsi_cls_session *cls_sess; struct iscsi_session *sess; @@ -4919,7 +4927,7 @@ static int qla4xxx_sess_conn_setup(struct scsi_qla_host *ha, memcpy(&ddb_entry->fw_ddb_entry, fw_ddb_entry, sizeof(struct dev_db_entry)); - qla4xxx_setup_flash_ddb_entry(ha, ddb_entry); + qla4xxx_setup_flash_ddb_entry(ha, ddb_entry, idx); cls_conn = iscsi_conn_setup(cls_sess, sizeof(struct qla_conn), conn_id); @@ -5036,7 +5044,7 @@ static void qla4xxx_build_nt_list(struct scsi_qla_host *ha, goto continue_next_nt; } - ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset); + ret = qla4xxx_sess_conn_setup(ha, fw_ddb_entry, is_reset, idx); if (ret == QLA_ERROR) goto exit_nt_list; @@ -5116,6 +5124,78 @@ void qla4xxx_build_ddb_list(struct scsi_qla_host *ha, int is_reset) } /** + * qla4xxx_wait_login_resp_boot_tgt - Wait for iSCSI boot target login + * response. + * @ha: pointer to adapter structure + * + * When the boot entry is normal iSCSI target then DF_BOOT_TGT flag will be + * set in DDB and we will wait for login response of boot targets during + * probe. + **/ +static void qla4xxx_wait_login_resp_boot_tgt(struct scsi_qla_host *ha) +{ + struct ddb_entry *ddb_entry; + struct dev_db_entry *fw_ddb_entry = NULL; + dma_addr_t fw_ddb_entry_dma; + unsigned long wtime; + uint32_t ddb_state; + int max_ddbs, idx, ret; + + max_ddbs = is_qla40XX(ha) ? MAX_DEV_DB_ENTRIES_40XX : + MAX_DEV_DB_ENTRIES; + + fw_ddb_entry = dma_alloc_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), + &fw_ddb_entry_dma, GFP_KERNEL); + if (!fw_ddb_entry) { + ql4_printk(KERN_ERR, ha, + "%s: Unable to allocate dma buffer\n", __func__); + goto exit_login_resp; + } + + wtime = jiffies + (HZ * BOOT_LOGIN_RESP_TOV); + + for (idx = 0; idx < max_ddbs; idx++) { + ddb_entry = qla4xxx_lookup_ddb_by_fw_index(ha, idx); + if (ddb_entry == NULL) + continue; + + if (test_bit(DF_BOOT_TGT, &ddb_entry->flags)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: DDB index [%d]\n", __func__, + ddb_entry->fw_ddb_index)); + do { + ret = qla4xxx_get_fwddb_entry(ha, + ddb_entry->fw_ddb_index, + fw_ddb_entry, fw_ddb_entry_dma, + NULL, NULL, &ddb_state, NULL, + NULL, NULL); + if (ret == QLA_ERROR) + goto exit_login_resp; + + if ((ddb_state == DDB_DS_SESSION_ACTIVE) || + (ddb_state == DDB_DS_SESSION_FAILED)) + break; + + schedule_timeout_uninterruptible(HZ); + + } while ((time_after(wtime, jiffies))); + + if (!time_after(wtime, jiffies)) { + DEBUG2(ql4_printk(KERN_INFO, ha, + "%s: Login response wait timer expired\n", + __func__)); + goto exit_login_resp; + } + } + } + +exit_login_resp: + if (fw_ddb_entry) + dma_free_coherent(&ha->pdev->dev, sizeof(*fw_ddb_entry), + fw_ddb_entry, fw_ddb_entry_dma); +} + +/** * qla4xxx_probe_adapter - callback function to probe HBA * @pdev: pointer to pci_dev structure * @pci_device_id: pointer to pci_device entry @@ -5270,7 +5350,7 @@ static int qla4xxx_probe_adapter(struct pci_dev *pdev, if (is_qla80XX(ha)) { ha->isp_ops->idc_lock(ha); dev_state = qla4_8xxx_rd_direct(ha, - QLA82XX_CRB_DEV_STATE); + QLA8XXX_CRB_DEV_STATE); ha->isp_ops->idc_unlock(ha); if (dev_state == QLA8XXX_DEV_FAILED) { ql4_printk(KERN_WARNING, ha, "%s: don't retry " @@ -5368,6 +5448,7 @@ skip_retry_init: /* Perform the build ddb list and login to each */ qla4xxx_build_ddb_list(ha, INIT_ADAPTER); iscsi_host_for_each_session(ha->host, qla4xxx_login_flash_ddb); + qla4xxx_wait_login_resp_boot_tgt(ha); qla4xxx_create_chap_list(ha); @@ -6008,14 +6089,6 @@ static int qla4xxx_host_reset(struct Scsi_Host *shost, int reset_type) goto exit_host_reset; } - rval = qla4xxx_wait_for_hba_online(ha); - if (rval != QLA_SUCCESS) { - DEBUG2(ql4_printk(KERN_INFO, ha, "%s: Unable to reset host " - "adapter\n", __func__)); - rval = -EIO; - goto exit_host_reset; - } - if (test_bit(DPC_RESET_HA, &ha->dpc_flags)) goto recover_adapter; @@ -6115,7 +6188,6 @@ qla4xxx_pci_mmio_enabled(struct pci_dev *pdev) static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) { uint32_t rval = QLA_ERROR; - uint32_t ret = 0; int fn; struct pci_dev *other_pdev = NULL; @@ -6201,16 +6273,7 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) qla4_8xxx_wr_direct(ha, QLA8XXX_CRB_DRV_STATE, 0); qla4_8xxx_set_drv_active(ha); ha->isp_ops->idc_unlock(ha); - ret = qla4xxx_request_irqs(ha); - if (ret) { - ql4_printk(KERN_WARNING, ha, "Failed to " - "reserve interrupt %d already in use.\n", - ha->pdev->irq); - rval = QLA_ERROR; - } else { - ha->isp_ops->enable_intrs(ha); - rval = QLA_SUCCESS; - } + ha->isp_ops->enable_intrs(ha); } } else { ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn 0x%x is not " @@ -6220,18 +6283,9 @@ static uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha) QLA8XXX_DEV_READY)) { clear_bit(AF_FW_RECOVERY, &ha->flags); rval = qla4xxx_initialize_adapter(ha, RESET_ADAPTER); - if (rval == QLA_SUCCESS) { - ret = qla4xxx_request_irqs(ha); - if (ret) { - ql4_printk(KERN_WARNING, ha, "Failed to" - " reserve interrupt %d already in" - " use.\n", ha->pdev->irq); - rval = QLA_ERROR; - } else { - ha->isp_ops->enable_intrs(ha); - rval = QLA_SUCCESS; - } - } + if (rval == QLA_SUCCESS) + ha->isp_ops->enable_intrs(ha); + ha->isp_ops->idc_lock(ha); qla4_8xxx_set_drv_active(ha); ha->isp_ops->idc_unlock(ha); diff --git a/drivers/scsi/qla4xxx/ql4_version.h b/drivers/scsi/qla4xxx/ql4_version.h index f6df2ea91ab..6775a45af31 100644 --- a/drivers/scsi/qla4xxx/ql4_version.h +++ b/drivers/scsi/qla4xxx/ql4_version.h @@ -5,4 +5,4 @@ * See LICENSE.qla4xxx for copyright and licensing details. */ -#define QLA4XXX_DRIVER_VERSION "5.03.00-k1" +#define QLA4XXX_DRIVER_VERSION "5.03.00-k4" diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 59d427bf08e..0a74b975efd 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2503,6 +2503,15 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr, } static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator, NULL); +static ssize_t +show_priv_session_target_id(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); + return sprintf(buf, "%d\n", session->target_id); +} +static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO, + show_priv_session_target_id, NULL); #define iscsi_priv_session_attr_show(field, format) \ static ssize_t \ @@ -2575,6 +2584,7 @@ static struct attribute *iscsi_session_attrs[] = { &dev_attr_priv_sess_creator.attr, &dev_attr_sess_chap_out_idx.attr, &dev_attr_sess_chap_in_idx.attr, + &dev_attr_priv_sess_target_id.attr, NULL, }; @@ -2638,6 +2648,8 @@ static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, return S_IRUGO; else if (attr == &dev_attr_priv_sess_creator.attr) return S_IRUGO; + else if (attr == &dev_attr_priv_sess_target_id.attr) + return S_IRUGO; else { WARN_ONCE(1, "Invalid session attr"); return 0; diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index c2c77d1ac49..a764f165b58 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -29,14 +29,14 @@ choice config THERMAL_DEFAULT_GOV_STEP_WISE bool "step_wise" - select STEP_WISE + select THERMAL_GOV_STEP_WISE help Use the step_wise governor as default. This throttles the devices one step at a time. config THERMAL_DEFAULT_GOV_FAIR_SHARE bool "fair_share" - select FAIR_SHARE + select THERMAL_GOV_FAIR_SHARE help Use the fair_share governor as default. This throttles the devices based on their 'contribution' to a zone. The @@ -44,24 +44,24 @@ config THERMAL_DEFAULT_GOV_FAIR_SHARE config THERMAL_DEFAULT_GOV_USER_SPACE bool "user_space" - select USER_SPACE + select THERMAL_GOV_USER_SPACE help Select this if you want to let the user space manage the lpatform thermals. endchoice -config FAIR_SHARE +config THERMAL_GOV_FAIR_SHARE bool "Fair-share thermal governor" help Enable this to manage platform thermals using fair-share governor. -config STEP_WISE +config THERMAL_GOV_STEP_WISE bool "Step_wise thermal governor" help Enable this to manage platform thermals using a simple linear -config USER_SPACE +config THERMAL_GOV_USER_SPACE bool "User_space thermal governor" help Enable this to let the user space manage the platform thermals. @@ -78,6 +78,14 @@ config CPU_THERMAL and not the ACPI interface. If you want this support, you should say Y here. +config THERMAL_EMULATION + bool "Thermal emulation mode support" + help + Enable this option to make a emul_temp sysfs node in thermal zone + directory to support temperature emulation. With emulation sysfs node, + user can manually input temperature and test the different trip + threshold behaviour for simulation purpose. + config SPEAR_THERMAL bool "SPEAr thermal sensor driver" depends on PLAT_SPEAR @@ -93,6 +101,14 @@ config RCAR_THERMAL Enable this to plug the R-Car thermal sensor driver into the Linux thermal framework +config KIRKWOOD_THERMAL + tristate "Temperature sensor on Marvell Kirkwood SoCs" + depends on ARCH_KIRKWOOD + depends on OF + help + Support for the Kirkwood thermal sensor driver into the Linux thermal + framework. Only kirkwood 88F6282 and 88F6283 have this sensor. + config EXYNOS_THERMAL tristate "Temperature sensor on Samsung EXYNOS" depends on (ARCH_EXYNOS4 || ARCH_EXYNOS5) @@ -101,6 +117,23 @@ config EXYNOS_THERMAL If you say yes here you get support for TMU (Thermal Management Unit) on SAMSUNG EXYNOS series of SoC. +config EXYNOS_THERMAL_EMUL + bool "EXYNOS TMU emulation mode support" + depends on EXYNOS_THERMAL + help + Exynos 4412 and 4414 and 5 series has emulation mode on TMU. + Enable this option will be make sysfs node in exynos thermal platform + device directory to support emulation mode. With emulation mode sysfs + node, you can manually input temperature to TMU for simulation purpose. + +config DOVE_THERMAL + tristate "Temperature sensor on Marvell Dove SoCs" + depends on ARCH_DOVE + depends on OF + help + Support for the Dove thermal sensor driver in the Linux thermal + framework. + config DB8500_THERMAL bool "DB8500 thermal management" depends on ARCH_U8500 @@ -122,4 +155,14 @@ config DB8500_CPUFREQ_COOLING bound cpufreq cooling device turns active to set CPU frequency low to cool down the CPU. +config INTEL_POWERCLAMP + tristate "Intel PowerClamp idle injection driver" + depends on THERMAL + depends on X86 + depends on CPU_SUP_INTEL + help + Enable this to enable Intel PowerClamp idle injection driver. This + enforce idle time which results in more package C-state residency. The + user interface is exposed via generic thermal framework. + endif diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index d8da683245f..d3a2b38c31e 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -5,9 +5,9 @@ obj-$(CONFIG_THERMAL) += thermal_sys.o # governors -obj-$(CONFIG_FAIR_SHARE) += fair_share.o -obj-$(CONFIG_STEP_WISE) += step_wise.o -obj-$(CONFIG_USER_SPACE) += user_space.o +obj-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o +obj-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o +obj-$(CONFIG_THERMAL_GOV_USER_SPACE) += user_space.o # cpufreq cooling obj-$(CONFIG_CPU_THERMAL) += cpu_cooling.o @@ -15,6 +15,10 @@ obj-$(CONFIG_CPU_THERMAL) += cpu_cooling.o # platform thermal drivers obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o obj-$(CONFIG_RCAR_THERMAL) += rcar_thermal.o +obj-$(CONFIG_KIRKWOOD_THERMAL) += kirkwood_thermal.o obj-$(CONFIG_EXYNOS_THERMAL) += exynos_thermal.o +obj-$(CONFIG_DOVE_THERMAL) += dove_thermal.o obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o +obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o + diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index c33fa5315d6..8dc44cbb3e0 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -111,8 +111,8 @@ static int is_cpufreq_valid(int cpu) /** * get_cpu_frequency - get the absolute value of frequency from level. * @cpu: cpu for which frequency is fetched. - * @level: level of frequency of the CPU - * e.g level=1 --> 1st MAX FREQ, LEVEL=2 ---> 2nd MAX FREQ, .... etc + * @level: level of frequency, equals cooling state of cpu cooling device + * e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc */ static unsigned int get_cpu_frequency(unsigned int cpu, unsigned long level) { diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c index 4cf8e72af90..21419851fc0 100644 --- a/drivers/thermal/db8500_cpufreq_cooling.c +++ b/drivers/thermal/db8500_cpufreq_cooling.c @@ -21,6 +21,7 @@ #include <linux/cpufreq.h> #include <linux/err.h> #include <linux/module.h> +#include <linux/of.h> #include <linux/platform_device.h> #include <linux/slab.h> @@ -73,15 +74,13 @@ static const struct of_device_id db8500_cpufreq_cooling_match[] = { { .compatible = "stericsson,db8500-cpufreq-cooling" }, {}, }; -#else -#define db8500_cpufreq_cooling_match NULL #endif static struct platform_driver db8500_cpufreq_cooling_driver = { .driver = { .owner = THIS_MODULE, .name = "db8500-cpufreq-cooling", - .of_match_table = db8500_cpufreq_cooling_match, + .of_match_table = of_match_ptr(db8500_cpufreq_cooling_match), }, .probe = db8500_cpufreq_cooling_probe, .suspend = db8500_cpufreq_cooling_suspend, diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index ec71ade3e31..61ce60a3592 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -508,15 +508,13 @@ static const struct of_device_id db8500_thermal_match[] = { { .compatible = "stericsson,db8500-thermal" }, {}, }; -#else -#define db8500_thermal_match NULL #endif static struct platform_driver db8500_thermal_driver = { .driver = { .owner = THIS_MODULE, .name = "db8500-thermal", - .of_match_table = db8500_thermal_match, + .of_match_table = of_match_ptr(db8500_thermal_match), }, .probe = db8500_thermal_probe, .suspend = db8500_thermal_suspend, diff --git a/drivers/thermal/dove_thermal.c b/drivers/thermal/dove_thermal.c new file mode 100644 index 00000000000..7b0bfa0e7a9 --- /dev/null +++ b/drivers/thermal/dove_thermal.c @@ -0,0 +1,209 @@ +/* + * Dove thermal sensor driver + * + * Copyright (C) 2013 Andrew Lunn <andrew@lunn.ch> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/thermal.h> + +#define DOVE_THERMAL_TEMP_OFFSET 1 +#define DOVE_THERMAL_TEMP_MASK 0x1FF + +/* Dove Thermal Manager Control and Status Register */ +#define PMU_TM_DISABLE_OFFS 0 +#define PMU_TM_DISABLE_MASK (0x1 << PMU_TM_DISABLE_OFFS) + +/* Dove Theraml Diode Control 0 Register */ +#define PMU_TDC0_SW_RST_MASK (0x1 << 1) +#define PMU_TDC0_SEL_VCAL_OFFS 5 +#define PMU_TDC0_SEL_VCAL_MASK (0x3 << PMU_TDC0_SEL_VCAL_OFFS) +#define PMU_TDC0_REF_CAL_CNT_OFFS 11 +#define PMU_TDC0_REF_CAL_CNT_MASK (0x1FF << PMU_TDC0_REF_CAL_CNT_OFFS) +#define PMU_TDC0_AVG_NUM_OFFS 25 +#define PMU_TDC0_AVG_NUM_MASK (0x7 << PMU_TDC0_AVG_NUM_OFFS) + +/* Dove Thermal Diode Control 1 Register */ +#define PMU_TEMP_DIOD_CTRL1_REG 0x04 +#define PMU_TDC1_TEMP_VALID_MASK (0x1 << 10) + +/* Dove Thermal Sensor Dev Structure */ +struct dove_thermal_priv { + void __iomem *sensor; + void __iomem *control; +}; + +static int dove_init_sensor(const struct dove_thermal_priv *priv) +{ + u32 reg; + u32 i; + + /* Configure the Diode Control Register #0 */ + reg = readl_relaxed(priv->control); + + /* Use average of 2 */ + reg &= ~PMU_TDC0_AVG_NUM_MASK; + reg |= (0x1 << PMU_TDC0_AVG_NUM_OFFS); + + /* Reference calibration value */ + reg &= ~PMU_TDC0_REF_CAL_CNT_MASK; + reg |= (0x0F1 << PMU_TDC0_REF_CAL_CNT_OFFS); + + /* Set the high level reference for calibration */ + reg &= ~PMU_TDC0_SEL_VCAL_MASK; + reg |= (0x2 << PMU_TDC0_SEL_VCAL_OFFS); + writel(reg, priv->control); + + /* Reset the sensor */ + reg = readl_relaxed(priv->control); + writel((reg | PMU_TDC0_SW_RST_MASK), priv->control); + writel(reg, priv->control); + + /* Enable the sensor */ + reg = readl_relaxed(priv->sensor); + reg &= ~PMU_TM_DISABLE_MASK; + writel(reg, priv->sensor); + + /* Poll the sensor for the first reading */ + for (i = 0; i < 1000000; i++) { + reg = readl_relaxed(priv->sensor); + if (reg & DOVE_THERMAL_TEMP_MASK) + break; + } + + if (i == 1000000) + return -EIO; + + return 0; +} + +static int dove_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + unsigned long reg; + struct dove_thermal_priv *priv = thermal->devdata; + + /* Valid check */ + reg = readl_relaxed(priv->control + PMU_TEMP_DIOD_CTRL1_REG); + if ((reg & PMU_TDC1_TEMP_VALID_MASK) == 0x0) { + dev_err(&thermal->device, + "Temperature sensor reading not valid\n"); + return -EIO; + } + + /* + * Calculate temperature. See Section 8.10.1 of 88AP510, + * Documentation/arm/Marvell/README + */ + reg = readl_relaxed(priv->sensor); + reg = (reg >> DOVE_THERMAL_TEMP_OFFSET) & DOVE_THERMAL_TEMP_MASK; + *temp = ((2281638UL - (7298*reg)) / 10); + + return 0; +} + +static struct thermal_zone_device_ops ops = { + .get_temp = dove_get_temp, +}; + +static const struct of_device_id dove_thermal_id_table[] = { + { .compatible = "marvell,dove-thermal" }, + {} +}; + +static int dove_thermal_probe(struct platform_device *pdev) +{ + struct thermal_zone_device *thermal = NULL; + struct dove_thermal_priv *priv; + struct resource *res; + int ret; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->sensor = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->sensor) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + priv->control = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->control) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + ret = dove_init_sensor(priv); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize sensor\n"); + return ret; + } + + thermal = thermal_zone_device_register("dove_thermal", 0, 0, + priv, &ops, NULL, 0, 0); + if (IS_ERR(thermal)) { + dev_err(&pdev->dev, + "Failed to register thermal zone device\n"); + return PTR_ERR(thermal); + } + + platform_set_drvdata(pdev, thermal); + + return 0; +} + +static int dove_thermal_exit(struct platform_device *pdev) +{ + struct thermal_zone_device *dove_thermal = + platform_get_drvdata(pdev); + + thermal_zone_device_unregister(dove_thermal); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +MODULE_DEVICE_TABLE(of, dove_thermal_id_table); + +static struct platform_driver dove_thermal_driver = { + .probe = dove_thermal_probe, + .remove = dove_thermal_exit, + .driver = { + .name = "dove_thermal", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(dove_thermal_id_table), + }, +}; + +module_platform_driver(dove_thermal_driver); + +MODULE_AUTHOR("Andrew Lunn <andrew@lunn.ch>"); +MODULE_DESCRIPTION("Dove thermal driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c index bada1308318..e04ebd8671a 100644 --- a/drivers/thermal/exynos_thermal.c +++ b/drivers/thermal/exynos_thermal.c @@ -82,7 +82,7 @@ #define EXYNOS_TRIMINFO_RELOAD 0x1 #define EXYNOS_TMU_CLEAR_RISE_INT 0x111 -#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 16) +#define EXYNOS_TMU_CLEAR_FALL_INT (0x111 << 12) #define EXYNOS_MUX_ADDR_VALUE 6 #define EXYNOS_MUX_ADDR_SHIFT 20 #define EXYNOS_TMU_TRIP_MODE_SHIFT 13 @@ -94,11 +94,20 @@ #define SENSOR_NAME_LEN 16 #define MAX_TRIP_COUNT 8 #define MAX_COOLING_DEVICE 4 +#define MAX_THRESHOLD_LEVS 4 #define ACTIVE_INTERVAL 500 #define IDLE_INTERVAL 10000 #define MCELSIUS 1000 +#ifdef CONFIG_EXYNOS_THERMAL_EMUL +#define EXYNOS_EMUL_TIME 0x57F0 +#define EXYNOS_EMUL_TIME_SHIFT 16 +#define EXYNOS_EMUL_DATA_SHIFT 8 +#define EXYNOS_EMUL_DATA_MASK 0xFF +#define EXYNOS_EMUL_ENABLE 0x1 +#endif /* CONFIG_EXYNOS_THERMAL_EMUL */ + /* CPU Zone information */ #define PANIC_ZONE 4 #define WARN_ZONE 3 @@ -125,6 +134,7 @@ struct exynos_tmu_data { struct thermal_trip_point_conf { int trip_val[MAX_TRIP_COUNT]; int trip_count; + u8 trigger_falling; }; struct thermal_cooling_conf { @@ -174,7 +184,8 @@ static int exynos_set_mode(struct thermal_zone_device *thermal, mutex_lock(&th_zone->therm_dev->lock); - if (mode == THERMAL_DEVICE_ENABLED) + if (mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) th_zone->therm_dev->polling_delay = IDLE_INTERVAL; else th_zone->therm_dev->polling_delay = 0; @@ -284,7 +295,7 @@ static int exynos_bind(struct thermal_zone_device *thermal, case MONITOR_ZONE: case WARN_ZONE: if (thermal_zone_bind_cooling_device(thermal, i, cdev, - level, level)) { + level, 0)) { pr_err("error binding cdev inst %d\n", i); ret = -EINVAL; } @@ -362,10 +373,17 @@ static int exynos_get_temp(struct thermal_zone_device *thermal, static int exynos_get_trend(struct thermal_zone_device *thermal, int trip, enum thermal_trend *trend) { - if (thermal->temperature >= trip) - *trend = THERMAL_TREND_RAISING; + int ret; + unsigned long trip_temp; + + ret = exynos_get_trip_temp(thermal, trip, &trip_temp); + if (ret < 0) + return ret; + + if (thermal->temperature >= trip_temp) + *trend = THERMAL_TREND_RAISE_FULL; else - *trend = THERMAL_TREND_DROPPING; + *trend = THERMAL_TREND_DROP_FULL; return 0; } @@ -413,7 +431,8 @@ static void exynos_report_trigger(void) break; } - if (th_zone->mode == THERMAL_DEVICE_ENABLED) { + if (th_zone->mode == THERMAL_DEVICE_ENABLED && + !th_zone->sensor_conf->trip_data.trigger_falling) { if (i > 0) th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL; else @@ -452,7 +471,8 @@ static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) th_zone->therm_dev = thermal_zone_device_register(sensor_conf->name, EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, NULL, 0, - IDLE_INTERVAL); + sensor_conf->trip_data.trigger_falling ? + 0 : IDLE_INTERVAL); if (IS_ERR(th_zone->therm_dev)) { pr_err("Failed to register thermal zone device\n"); @@ -559,8 +579,9 @@ static int exynos_tmu_initialize(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); struct exynos_tmu_platform_data *pdata = data->pdata; - unsigned int status, trim_info, rising_threshold; - int ret = 0, threshold_code; + unsigned int status, trim_info; + unsigned int rising_threshold = 0, falling_threshold = 0; + int ret = 0, threshold_code, i, trigger_levs = 0; mutex_lock(&data->lock); clk_enable(data->clk); @@ -585,6 +606,11 @@ static int exynos_tmu_initialize(struct platform_device *pdev) (data->temp_error2 != 0)) data->temp_error1 = pdata->efuse_value; + /* Count trigger levels to be enabled */ + for (i = 0; i < MAX_THRESHOLD_LEVS; i++) + if (pdata->trigger_levels[i]) + trigger_levs++; + if (data->soc == SOC_ARCH_EXYNOS4210) { /* Write temperature code for threshold */ threshold_code = temp_to_code(data, pdata->threshold); @@ -594,44 +620,38 @@ static int exynos_tmu_initialize(struct platform_device *pdev) } writeb(threshold_code, data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP); - - writeb(pdata->trigger_levels[0], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0); - writeb(pdata->trigger_levels[1], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL1); - writeb(pdata->trigger_levels[2], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL2); - writeb(pdata->trigger_levels[3], - data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL3); + for (i = 0; i < trigger_levs; i++) + writeb(pdata->trigger_levels[i], + data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4); writel(EXYNOS4210_TMU_INTCLEAR_VAL, data->base + EXYNOS_TMU_REG_INTCLEAR); } else if (data->soc == SOC_ARCH_EXYNOS) { - /* Write temperature code for threshold */ - threshold_code = temp_to_code(data, pdata->trigger_levels[0]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - rising_threshold = threshold_code; - threshold_code = temp_to_code(data, pdata->trigger_levels[1]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; - } - rising_threshold |= (threshold_code << 8); - threshold_code = temp_to_code(data, pdata->trigger_levels[2]); - if (threshold_code < 0) { - ret = threshold_code; - goto out; + /* Write temperature code for rising and falling threshold */ + for (i = 0; i < trigger_levs; i++) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i]); + if (threshold_code < 0) { + ret = threshold_code; + goto out; + } + rising_threshold |= threshold_code << 8 * i; + if (pdata->threshold_falling) { + threshold_code = temp_to_code(data, + pdata->trigger_levels[i] - + pdata->threshold_falling); + if (threshold_code > 0) + falling_threshold |= + threshold_code << 8 * i; + } } - rising_threshold |= (threshold_code << 16); writel(rising_threshold, data->base + EXYNOS_THD_TEMP_RISE); - writel(0, data->base + EXYNOS_THD_TEMP_FALL); + writel(falling_threshold, + data->base + EXYNOS_THD_TEMP_FALL); - writel(EXYNOS_TMU_CLEAR_RISE_INT|EXYNOS_TMU_CLEAR_FALL_INT, + writel(EXYNOS_TMU_CLEAR_RISE_INT | EXYNOS_TMU_CLEAR_FALL_INT, data->base + EXYNOS_TMU_REG_INTCLEAR); } out: @@ -664,6 +684,8 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on) pdata->trigger_level2_en << 8 | pdata->trigger_level1_en << 4 | pdata->trigger_level0_en; + if (pdata->threshold_falling) + interrupt_en |= interrupt_en << 16; } else { con |= EXYNOS_TMU_CORE_OFF; interrupt_en = 0; /* Disable all interrupts */ @@ -697,20 +719,19 @@ static void exynos_tmu_work(struct work_struct *work) struct exynos_tmu_data *data = container_of(work, struct exynos_tmu_data, irq_work); + exynos_report_trigger(); mutex_lock(&data->lock); clk_enable(data->clk); - - if (data->soc == SOC_ARCH_EXYNOS) - writel(EXYNOS_TMU_CLEAR_RISE_INT, + writel(EXYNOS_TMU_CLEAR_RISE_INT | + EXYNOS_TMU_CLEAR_FALL_INT, data->base + EXYNOS_TMU_REG_INTCLEAR); else writel(EXYNOS4210_TMU_INTCLEAR_VAL, data->base + EXYNOS_TMU_REG_INTCLEAR); - clk_disable(data->clk); mutex_unlock(&data->lock); - exynos_report_trigger(); + enable_irq(data->irq); } @@ -759,6 +780,7 @@ static struct exynos_tmu_platform_data const exynos4210_default_tmu_data = { #if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412) static struct exynos_tmu_platform_data const exynos_default_tmu_data = { + .threshold_falling = 10, .trigger_levels[0] = 85, .trigger_levels[1] = 103, .trigger_levels[2] = 110, @@ -800,8 +822,6 @@ static const struct of_device_id exynos_tmu_match[] = { {}, }; MODULE_DEVICE_TABLE(of, exynos_tmu_match); -#else -#define exynos_tmu_match NULL #endif static struct platform_device_id exynos_tmu_driver_ids[] = { @@ -832,6 +852,94 @@ static inline struct exynos_tmu_platform_data *exynos_get_driver_data( return (struct exynos_tmu_platform_data *) platform_get_device_id(pdev)->driver_data; } + +#ifdef CONFIG_EXYNOS_THERMAL_EMUL +static ssize_t exynos_tmu_emulation_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct platform_device *pdev = container_of(dev, + struct platform_device, dev); + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + unsigned int reg; + u8 temp_code; + int temp = 0; + + if (data->soc == SOC_ARCH_EXYNOS4210) + goto out; + + mutex_lock(&data->lock); + clk_enable(data->clk); + reg = readl(data->base + EXYNOS_EMUL_CON); + clk_disable(data->clk); + mutex_unlock(&data->lock); + + if (reg & EXYNOS_EMUL_ENABLE) { + reg >>= EXYNOS_EMUL_DATA_SHIFT; + temp_code = reg & EXYNOS_EMUL_DATA_MASK; + temp = code_to_temp(data, temp_code); + } +out: + return sprintf(buf, "%d\n", temp * MCELSIUS); +} + +static ssize_t exynos_tmu_emulation_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct platform_device *pdev = container_of(dev, + struct platform_device, dev); + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + unsigned int reg; + int temp; + + if (data->soc == SOC_ARCH_EXYNOS4210) + goto out; + + if (!sscanf(buf, "%d\n", &temp) || temp < 0) + return -EINVAL; + + mutex_lock(&data->lock); + clk_enable(data->clk); + + reg = readl(data->base + EXYNOS_EMUL_CON); + + if (temp) { + /* Both CELSIUS and MCELSIUS type are available for input */ + if (temp > MCELSIUS) + temp /= MCELSIUS; + + reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT) | + (temp_to_code(data, (temp / MCELSIUS)) + << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE; + } else { + reg &= ~EXYNOS_EMUL_ENABLE; + } + + writel(reg, data->base + EXYNOS_EMUL_CON); + + clk_disable(data->clk); + mutex_unlock(&data->lock); + +out: + return count; +} + +static DEVICE_ATTR(emulation, 0644, exynos_tmu_emulation_show, + exynos_tmu_emulation_store); +static int create_emulation_sysfs(struct device *dev) +{ + return device_create_file(dev, &dev_attr_emulation); +} +static void remove_emulation_sysfs(struct device *dev) +{ + device_remove_file(dev, &dev_attr_emulation); +} +#else +static inline int create_emulation_sysfs(struct device *dev) { return 0; } +static inline void remove_emulation_sysfs(struct device *dev) {} +#endif + static int exynos_tmu_probe(struct platform_device *pdev) { struct exynos_tmu_data *data; @@ -914,6 +1022,8 @@ static int exynos_tmu_probe(struct platform_device *pdev) exynos_sensor_conf.trip_data.trip_val[i] = pdata->threshold + pdata->trigger_levels[i]; + exynos_sensor_conf.trip_data.trigger_falling = pdata->threshold_falling; + exynos_sensor_conf.cooling_data.freq_clip_count = pdata->freq_tab_count; for (i = 0; i < pdata->freq_tab_count; i++) { @@ -928,6 +1038,11 @@ static int exynos_tmu_probe(struct platform_device *pdev) dev_err(&pdev->dev, "Failed to register thermal interface\n"); goto err_clk; } + + ret = create_emulation_sysfs(&pdev->dev); + if (ret) + dev_err(&pdev->dev, "Failed to create emulation mode sysfs node\n"); + return 0; err_clk: platform_set_drvdata(pdev, NULL); @@ -939,6 +1054,8 @@ static int exynos_tmu_remove(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); + remove_emulation_sysfs(&pdev->dev); + exynos_tmu_control(pdev, false); exynos_unregister_thermal(); @@ -980,7 +1097,7 @@ static struct platform_driver exynos_tmu_driver = { .name = "exynos-tmu", .owner = THIS_MODULE, .pm = EXYNOS_TMU_PM, - .of_match_table = exynos_tmu_match, + .of_match_table = of_match_ptr(exynos_tmu_match), }, .probe = exynos_tmu_probe, .remove = exynos_tmu_remove, diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c new file mode 100644 index 00000000000..b40b37cd25e --- /dev/null +++ b/drivers/thermal/intel_powerclamp.c @@ -0,0 +1,795 @@ +/* + * intel_powerclamp.c - package c-state idle injection + * + * Copyright (c) 2012, Intel Corporation. + * + * Authors: + * Arjan van de Ven <arjan@linux.intel.com> + * Jacob Pan <jacob.jun.pan@linux.intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * + * TODO: + * 1. better handle wakeup from external interrupts, currently a fixed + * compensation is added to clamping duration when excessive amount + * of wakeups are observed during idle time. the reason is that in + * case of external interrupts without need for ack, clamping down + * cpu in non-irq context does not reduce irq. for majority of the + * cases, clamping down cpu does help reduce irq as well, we should + * be able to differenciate the two cases and give a quantitative + * solution for the irqs that we can control. perhaps based on + * get_cpu_iowait_time_us() + * + * 2. synchronization with other hw blocks + * + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/freezer.h> +#include <linux/cpu.h> +#include <linux/thermal.h> +#include <linux/slab.h> +#include <linux/tick.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/sched/rt.h> + +#include <asm/nmi.h> +#include <asm/msr.h> +#include <asm/mwait.h> +#include <asm/cpu_device_id.h> +#include <asm/idle.h> +#include <asm/hardirq.h> + +#define MAX_TARGET_RATIO (50U) +/* For each undisturbed clamping period (no extra wake ups during idle time), + * we increment the confidence counter for the given target ratio. + * CONFIDENCE_OK defines the level where runtime calibration results are + * valid. + */ +#define CONFIDENCE_OK (3) +/* Default idle injection duration, driver adjust sleep time to meet target + * idle ratio. Similar to frequency modulation. + */ +#define DEFAULT_DURATION_JIFFIES (6) + +static unsigned int target_mwait; +static struct dentry *debug_dir; + +/* user selected target */ +static unsigned int set_target_ratio; +static unsigned int current_ratio; +static bool should_skip; +static bool reduce_irq; +static atomic_t idle_wakeup_counter; +static unsigned int control_cpu; /* The cpu assigned to collect stat and update + * control parameters. default to BSP but BSP + * can be offlined. + */ +static bool clamping; + + +static struct task_struct * __percpu *powerclamp_thread; +static struct thermal_cooling_device *cooling_dev; +static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu + * clamping thread + */ + +static unsigned int duration; +static unsigned int pkg_cstate_ratio_cur; +static unsigned int window_size; + +static int duration_set(const char *arg, const struct kernel_param *kp) +{ + int ret = 0; + unsigned long new_duration; + + ret = kstrtoul(arg, 10, &new_duration); + if (ret) + goto exit; + if (new_duration > 25 || new_duration < 6) { + pr_err("Out of recommended range %lu, between 6-25ms\n", + new_duration); + ret = -EINVAL; + } + + duration = clamp(new_duration, 6ul, 25ul); + smp_mb(); + +exit: + + return ret; +} + +static struct kernel_param_ops duration_ops = { + .set = duration_set, + .get = param_get_int, +}; + + +module_param_cb(duration, &duration_ops, &duration, 0644); +MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec."); + +struct powerclamp_calibration_data { + unsigned long confidence; /* used for calibration, basically a counter + * gets incremented each time a clamping + * period is completed without extra wakeups + * once that counter is reached given level, + * compensation is deemed usable. + */ + unsigned long steady_comp; /* steady state compensation used when + * no extra wakeups occurred. + */ + unsigned long dynamic_comp; /* compensate excessive wakeup from idle + * mostly from external interrupts. + */ +}; + +static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO]; + +static int window_size_set(const char *arg, const struct kernel_param *kp) +{ + int ret = 0; + unsigned long new_window_size; + + ret = kstrtoul(arg, 10, &new_window_size); + if (ret) + goto exit_win; + if (new_window_size > 10 || new_window_size < 2) { + pr_err("Out of recommended window size %lu, between 2-10\n", + new_window_size); + ret = -EINVAL; + } + + window_size = clamp(new_window_size, 2ul, 10ul); + smp_mb(); + +exit_win: + + return ret; +} + +static struct kernel_param_ops window_size_ops = { + .set = window_size_set, + .get = param_get_int, +}; + +module_param_cb(window_size, &window_size_ops, &window_size, 0644); +MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n" + "\tpowerclamp controls idle ratio within this window. larger\n" + "\twindow size results in slower response time but more smooth\n" + "\tclamping results. default to 2."); + +static void find_target_mwait(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int highest_cstate = 0; + unsigned int highest_subcstate = 0; + int i; + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx); + + if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) || + !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) + return; + + edx >>= MWAIT_SUBSTATE_SIZE; + for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { + if (edx & MWAIT_SUBSTATE_MASK) { + highest_cstate = i; + highest_subcstate = edx & MWAIT_SUBSTATE_MASK; + } + } + target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) | + (highest_subcstate - 1); + +} + +static u64 pkg_state_counter(void) +{ + u64 val; + u64 count = 0; + + static bool skip_c2; + static bool skip_c3; + static bool skip_c6; + static bool skip_c7; + + if (!skip_c2) { + if (!rdmsrl_safe(MSR_PKG_C2_RESIDENCY, &val)) + count += val; + else + skip_c2 = true; + } + + if (!skip_c3) { + if (!rdmsrl_safe(MSR_PKG_C3_RESIDENCY, &val)) + count += val; + else + skip_c3 = true; + } + + if (!skip_c6) { + if (!rdmsrl_safe(MSR_PKG_C6_RESIDENCY, &val)) + count += val; + else + skip_c6 = true; + } + + if (!skip_c7) { + if (!rdmsrl_safe(MSR_PKG_C7_RESIDENCY, &val)) + count += val; + else + skip_c7 = true; + } + + return count; +} + +static void noop_timer(unsigned long foo) +{ + /* empty... just the fact that we get the interrupt wakes us up */ +} + +static unsigned int get_compensation(int ratio) +{ + unsigned int comp = 0; + + /* we only use compensation if all adjacent ones are good */ + if (ratio == 1 && + cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio + 1].confidence >= CONFIDENCE_OK && + cal_data[ratio + 2].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio + 1].steady_comp + + cal_data[ratio + 2].steady_comp) / 3; + } else if (ratio == MAX_TARGET_RATIO - 1 && + cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio - 1].confidence >= CONFIDENCE_OK && + cal_data[ratio - 2].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio - 1].steady_comp + + cal_data[ratio - 2].steady_comp) / 3; + } else if (cal_data[ratio].confidence >= CONFIDENCE_OK && + cal_data[ratio - 1].confidence >= CONFIDENCE_OK && + cal_data[ratio + 1].confidence >= CONFIDENCE_OK) { + comp = (cal_data[ratio].steady_comp + + cal_data[ratio - 1].steady_comp + + cal_data[ratio + 1].steady_comp) / 3; + } + + /* REVISIT: simple penalty of double idle injection */ + if (reduce_irq) + comp = ratio; + /* do not exceed limit */ + if (comp + ratio >= MAX_TARGET_RATIO) + comp = MAX_TARGET_RATIO - ratio - 1; + + return comp; +} + +static void adjust_compensation(int target_ratio, unsigned int win) +{ + int delta; + struct powerclamp_calibration_data *d = &cal_data[target_ratio]; + + /* + * adjust compensations if confidence level has not been reached or + * there are too many wakeups during the last idle injection period, we + * cannot trust the data for compensation. + */ + if (d->confidence >= CONFIDENCE_OK || + atomic_read(&idle_wakeup_counter) > + win * num_online_cpus()) + return; + + delta = set_target_ratio - current_ratio; + /* filter out bad data */ + if (delta >= 0 && delta <= (1+target_ratio/10)) { + if (d->steady_comp) + d->steady_comp = + roundup(delta+d->steady_comp, 2)/2; + else + d->steady_comp = delta; + d->confidence++; + } +} + +static bool powerclamp_adjust_controls(unsigned int target_ratio, + unsigned int guard, unsigned int win) +{ + static u64 msr_last, tsc_last; + u64 msr_now, tsc_now; + u64 val64; + + /* check result for the last window */ + msr_now = pkg_state_counter(); + rdtscll(tsc_now); + + /* calculate pkg cstate vs tsc ratio */ + if (!msr_last || !tsc_last) + current_ratio = 1; + else if (tsc_now-tsc_last) { + val64 = 100*(msr_now-msr_last); + do_div(val64, (tsc_now-tsc_last)); + current_ratio = val64; + } + + /* update record */ + msr_last = msr_now; + tsc_last = tsc_now; + + adjust_compensation(target_ratio, win); + /* + * too many external interrupts, set flag such + * that we can take measure later. + */ + reduce_irq = atomic_read(&idle_wakeup_counter) >= + 2 * win * num_online_cpus(); + + atomic_set(&idle_wakeup_counter, 0); + /* if we are above target+guard, skip */ + return set_target_ratio + guard <= current_ratio; +} + +static int clamp_thread(void *arg) +{ + int cpunr = (unsigned long)arg; + DEFINE_TIMER(wakeup_timer, noop_timer, 0, 0); + static const struct sched_param param = { + .sched_priority = MAX_USER_RT_PRIO/2, + }; + unsigned int count = 0; + unsigned int target_ratio; + + set_bit(cpunr, cpu_clamping_mask); + set_freezable(); + init_timer_on_stack(&wakeup_timer); + sched_setscheduler(current, SCHED_FIFO, ¶m); + + while (true == clamping && !kthread_should_stop() && + cpu_online(cpunr)) { + int sleeptime; + unsigned long target_jiffies; + unsigned int guard; + unsigned int compensation = 0; + int interval; /* jiffies to sleep for each attempt */ + unsigned int duration_jiffies = msecs_to_jiffies(duration); + unsigned int window_size_now; + + try_to_freeze(); + /* + * make sure user selected ratio does not take effect until + * the next round. adjust target_ratio if user has changed + * target such that we can converge quickly. + */ + target_ratio = set_target_ratio; + guard = 1 + target_ratio/20; + window_size_now = window_size; + count++; + + /* + * systems may have different ability to enter package level + * c-states, thus we need to compensate the injected idle ratio + * to achieve the actual target reported by the HW. + */ + compensation = get_compensation(target_ratio); + interval = duration_jiffies*100/(target_ratio+compensation); + + /* align idle time */ + target_jiffies = roundup(jiffies, interval); + sleeptime = target_jiffies - jiffies; + if (sleeptime <= 0) + sleeptime = 1; + schedule_timeout_interruptible(sleeptime); + /* + * only elected controlling cpu can collect stats and update + * control parameters. + */ + if (cpunr == control_cpu && !(count%window_size_now)) { + should_skip = + powerclamp_adjust_controls(target_ratio, + guard, window_size_now); + smp_mb(); + } + + if (should_skip) + continue; + + target_jiffies = jiffies + duration_jiffies; + mod_timer(&wakeup_timer, target_jiffies); + if (unlikely(local_softirq_pending())) + continue; + /* + * stop tick sched during idle time, interrupts are still + * allowed. thus jiffies are updated properly. + */ + preempt_disable(); + tick_nohz_idle_enter(); + /* mwait until target jiffies is reached */ + while (time_before(jiffies, target_jiffies)) { + unsigned long ecx = 1; + unsigned long eax = target_mwait; + + /* + * REVISIT: may call enter_idle() to notify drivers who + * can save power during cpu idle. same for exit_idle() + */ + local_touch_nmi(); + stop_critical_timings(); + __monitor((void *)¤t_thread_info()->flags, 0, 0); + cpu_relax(); /* allow HT sibling to run */ + __mwait(eax, ecx); + start_critical_timings(); + atomic_inc(&idle_wakeup_counter); + } + tick_nohz_idle_exit(); + preempt_enable_no_resched(); + } + del_timer_sync(&wakeup_timer); + clear_bit(cpunr, cpu_clamping_mask); + + return 0; +} + +/* + * 1 HZ polling while clamping is active, useful for userspace + * to monitor actual idle ratio. + */ +static void poll_pkg_cstate(struct work_struct *dummy); +static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate); +static void poll_pkg_cstate(struct work_struct *dummy) +{ + static u64 msr_last; + static u64 tsc_last; + static unsigned long jiffies_last; + + u64 msr_now; + unsigned long jiffies_now; + u64 tsc_now; + u64 val64; + + msr_now = pkg_state_counter(); + rdtscll(tsc_now); + jiffies_now = jiffies; + + /* calculate pkg cstate vs tsc ratio */ + if (!msr_last || !tsc_last) + pkg_cstate_ratio_cur = 1; + else { + if (tsc_now - tsc_last) { + val64 = 100 * (msr_now - msr_last); + do_div(val64, (tsc_now - tsc_last)); + pkg_cstate_ratio_cur = val64; + } + } + + /* update record */ + msr_last = msr_now; + jiffies_last = jiffies_now; + tsc_last = tsc_now; + + if (true == clamping) + schedule_delayed_work(&poll_pkg_cstate_work, HZ); +} + +static int start_power_clamp(void) +{ + unsigned long cpu; + struct task_struct *thread; + + /* check if pkg cstate counter is completely 0, abort in this case */ + if (!pkg_state_counter()) { + pr_err("pkg cstate counter not functional, abort\n"); + return -EINVAL; + } + + set_target_ratio = clamp(set_target_ratio, 0U, MAX_TARGET_RATIO - 1); + /* prevent cpu hotplug */ + get_online_cpus(); + + /* prefer BSP */ + control_cpu = 0; + if (!cpu_online(control_cpu)) + control_cpu = smp_processor_id(); + + clamping = true; + schedule_delayed_work(&poll_pkg_cstate_work, 0); + + /* start one thread per online cpu */ + for_each_online_cpu(cpu) { + struct task_struct **p = + per_cpu_ptr(powerclamp_thread, cpu); + + thread = kthread_create_on_node(clamp_thread, + (void *) cpu, + cpu_to_node(cpu), + "kidle_inject/%ld", cpu); + /* bind to cpu here */ + if (likely(!IS_ERR(thread))) { + kthread_bind(thread, cpu); + wake_up_process(thread); + *p = thread; + } + + } + put_online_cpus(); + + return 0; +} + +static void end_power_clamp(void) +{ + int i; + struct task_struct *thread; + + clamping = false; + /* + * make clamping visible to other cpus and give per cpu clamping threads + * sometime to exit, or gets killed later. + */ + smp_mb(); + msleep(20); + if (bitmap_weight(cpu_clamping_mask, num_possible_cpus())) { + for_each_set_bit(i, cpu_clamping_mask, num_possible_cpus()) { + pr_debug("clamping thread for cpu %d alive, kill\n", i); + thread = *per_cpu_ptr(powerclamp_thread, i); + kthread_stop(thread); + } + } +} + +static int powerclamp_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned long cpu = (unsigned long)hcpu; + struct task_struct *thread; + struct task_struct **percpu_thread = + per_cpu_ptr(powerclamp_thread, cpu); + + if (false == clamping) + goto exit_ok; + + switch (action) { + case CPU_ONLINE: + thread = kthread_create_on_node(clamp_thread, + (void *) cpu, + cpu_to_node(cpu), + "kidle_inject/%lu", cpu); + if (likely(!IS_ERR(thread))) { + kthread_bind(thread, cpu); + wake_up_process(thread); + *percpu_thread = thread; + } + /* prefer BSP as controlling CPU */ + if (cpu == 0) { + control_cpu = 0; + smp_mb(); + } + break; + case CPU_DEAD: + if (test_bit(cpu, cpu_clamping_mask)) { + pr_err("cpu %lu dead but powerclamping thread is not\n", + cpu); + kthread_stop(*percpu_thread); + } + if (cpu == control_cpu) { + control_cpu = smp_processor_id(); + smp_mb(); + } + } + +exit_ok: + return NOTIFY_OK; +} + +static struct notifier_block powerclamp_cpu_notifier = { + .notifier_call = powerclamp_cpu_callback, +}; + +static int powerclamp_get_max_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + *state = MAX_TARGET_RATIO; + + return 0; +} + +static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev, + unsigned long *state) +{ + if (true == clamping) + *state = pkg_cstate_ratio_cur; + else + /* to save power, do not poll idle ratio while not clamping */ + *state = -1; /* indicates invalid state */ + + return 0; +} + +static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev, + unsigned long new_target_ratio) +{ + int ret = 0; + + new_target_ratio = clamp(new_target_ratio, 0UL, + (unsigned long) (MAX_TARGET_RATIO-1)); + if (set_target_ratio == 0 && new_target_ratio > 0) { + pr_info("Start idle injection to reduce power\n"); + set_target_ratio = new_target_ratio; + ret = start_power_clamp(); + goto exit_set; + } else if (set_target_ratio > 0 && new_target_ratio == 0) { + pr_info("Stop forced idle injection\n"); + set_target_ratio = 0; + end_power_clamp(); + } else /* adjust currently running */ { + set_target_ratio = new_target_ratio; + /* make new set_target_ratio visible to other cpus */ + smp_mb(); + } + +exit_set: + return ret; +} + +/* bind to generic thermal layer as cooling device*/ +static struct thermal_cooling_device_ops powerclamp_cooling_ops = { + .get_max_state = powerclamp_get_max_state, + .get_cur_state = powerclamp_get_cur_state, + .set_cur_state = powerclamp_set_cur_state, +}; + +/* runs on Nehalem and later */ +static const struct x86_cpu_id intel_powerclamp_ids[] = { + { X86_VENDOR_INTEL, 6, 0x1a}, + { X86_VENDOR_INTEL, 6, 0x1c}, + { X86_VENDOR_INTEL, 6, 0x1e}, + { X86_VENDOR_INTEL, 6, 0x1f}, + { X86_VENDOR_INTEL, 6, 0x25}, + { X86_VENDOR_INTEL, 6, 0x26}, + { X86_VENDOR_INTEL, 6, 0x2a}, + { X86_VENDOR_INTEL, 6, 0x2c}, + { X86_VENDOR_INTEL, 6, 0x2d}, + { X86_VENDOR_INTEL, 6, 0x2e}, + { X86_VENDOR_INTEL, 6, 0x2f}, + { X86_VENDOR_INTEL, 6, 0x3a}, + {} +}; +MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids); + +static int powerclamp_probe(void) +{ + if (!x86_match_cpu(intel_powerclamp_ids)) { + pr_err("Intel powerclamp does not run on family %d model %d\n", + boot_cpu_data.x86, boot_cpu_data.x86_model); + return -ENODEV; + } + if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC) || + !boot_cpu_has(X86_FEATURE_CONSTANT_TSC) || + !boot_cpu_has(X86_FEATURE_MWAIT) || + !boot_cpu_has(X86_FEATURE_ARAT)) + return -ENODEV; + + /* find the deepest mwait value */ + find_target_mwait(); + + return 0; +} + +static int powerclamp_debug_show(struct seq_file *m, void *unused) +{ + int i = 0; + + seq_printf(m, "controlling cpu: %d\n", control_cpu); + seq_printf(m, "pct confidence steady dynamic (compensation)\n"); + for (i = 0; i < MAX_TARGET_RATIO; i++) { + seq_printf(m, "%d\t%lu\t%lu\t%lu\n", + i, + cal_data[i].confidence, + cal_data[i].steady_comp, + cal_data[i].dynamic_comp); + } + + return 0; +} + +static int powerclamp_debug_open(struct inode *inode, + struct file *file) +{ + return single_open(file, powerclamp_debug_show, inode->i_private); +} + +static const struct file_operations powerclamp_debug_fops = { + .open = powerclamp_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static inline void powerclamp_create_debug_files(void) +{ + debug_dir = debugfs_create_dir("intel_powerclamp", NULL); + if (!debug_dir) + return; + + if (!debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, + cal_data, &powerclamp_debug_fops)) + goto file_error; + + return; + +file_error: + debugfs_remove_recursive(debug_dir); +} + +static int powerclamp_init(void) +{ + int retval; + int bitmap_size; + + bitmap_size = BITS_TO_LONGS(num_possible_cpus()) * sizeof(long); + cpu_clamping_mask = kzalloc(bitmap_size, GFP_KERNEL); + if (!cpu_clamping_mask) + return -ENOMEM; + + /* probe cpu features and ids here */ + retval = powerclamp_probe(); + if (retval) + return retval; + /* set default limit, maybe adjusted during runtime based on feedback */ + window_size = 2; + register_hotcpu_notifier(&powerclamp_cpu_notifier); + powerclamp_thread = alloc_percpu(struct task_struct *); + cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL, + &powerclamp_cooling_ops); + if (IS_ERR(cooling_dev)) + return -ENODEV; + + if (!duration) + duration = jiffies_to_msecs(DEFAULT_DURATION_JIFFIES); + powerclamp_create_debug_files(); + + return 0; +} +module_init(powerclamp_init); + +static void powerclamp_exit(void) +{ + unregister_hotcpu_notifier(&powerclamp_cpu_notifier); + end_power_clamp(); + free_percpu(powerclamp_thread); + thermal_cooling_device_unregister(cooling_dev); + kfree(cpu_clamping_mask); + + cancel_delayed_work_sync(&poll_pkg_cstate_work); + debugfs_remove_recursive(debug_dir); +} +module_exit(powerclamp_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>"); +MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>"); +MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs"); diff --git a/drivers/thermal/kirkwood_thermal.c b/drivers/thermal/kirkwood_thermal.c new file mode 100644 index 00000000000..65cb4f09e8f --- /dev/null +++ b/drivers/thermal/kirkwood_thermal.c @@ -0,0 +1,134 @@ +/* + * Kirkwood thermal sensor driver + * + * Copyright (C) 2012 Nobuhiro Iwamatsu <iwamatsu@nigauri.org> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ +#include <linux/device.h> +#include <linux/err.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/thermal.h> + +#define KIRKWOOD_THERMAL_VALID_OFFSET 9 +#define KIRKWOOD_THERMAL_VALID_MASK 0x1 +#define KIRKWOOD_THERMAL_TEMP_OFFSET 10 +#define KIRKWOOD_THERMAL_TEMP_MASK 0x1FF + +/* Kirkwood Thermal Sensor Dev Structure */ +struct kirkwood_thermal_priv { + void __iomem *sensor; +}; + +static int kirkwood_get_temp(struct thermal_zone_device *thermal, + unsigned long *temp) +{ + unsigned long reg; + struct kirkwood_thermal_priv *priv = thermal->devdata; + + reg = readl_relaxed(priv->sensor); + + /* Valid check */ + if (!(reg >> KIRKWOOD_THERMAL_VALID_OFFSET) & + KIRKWOOD_THERMAL_VALID_MASK) { + dev_err(&thermal->device, + "Temperature sensor reading not valid\n"); + return -EIO; + } + + /* + * Calculate temperature. See Section 8.10.1 of the 88AP510, + * datasheet, which has the same sensor. + * Documentation/arm/Marvell/README + */ + reg = (reg >> KIRKWOOD_THERMAL_TEMP_OFFSET) & + KIRKWOOD_THERMAL_TEMP_MASK; + *temp = ((2281638UL - (7298*reg)) / 10); + + return 0; +} + +static struct thermal_zone_device_ops ops = { + .get_temp = kirkwood_get_temp, +}; + +static const struct of_device_id kirkwood_thermal_id_table[] = { + { .compatible = "marvell,kirkwood-thermal" }, + {} +}; + +static int kirkwood_thermal_probe(struct platform_device *pdev) +{ + struct thermal_zone_device *thermal = NULL; + struct kirkwood_thermal_priv *priv; + struct resource *res; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&pdev->dev, "Failed to get platform resource\n"); + return -ENODEV; + } + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->sensor = devm_request_and_ioremap(&pdev->dev, res); + if (!priv->sensor) { + dev_err(&pdev->dev, "Failed to request_ioremap memory\n"); + return -EADDRNOTAVAIL; + } + + thermal = thermal_zone_device_register("kirkwood_thermal", 0, 0, + priv, &ops, NULL, 0, 0); + if (IS_ERR(thermal)) { + dev_err(&pdev->dev, + "Failed to register thermal zone device\n"); + return PTR_ERR(thermal); + } + + platform_set_drvdata(pdev, thermal); + + return 0; +} + +static int kirkwood_thermal_exit(struct platform_device *pdev) +{ + struct thermal_zone_device *kirkwood_thermal = + platform_get_drvdata(pdev); + + thermal_zone_device_unregister(kirkwood_thermal); + platform_set_drvdata(pdev, NULL); + + return 0; +} + +MODULE_DEVICE_TABLE(of, kirkwood_thermal_id_table); + +static struct platform_driver kirkwood_thermal_driver = { + .probe = kirkwood_thermal_probe, + .remove = kirkwood_thermal_exit, + .driver = { + .name = "kirkwood_thermal", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(kirkwood_thermal_id_table), + }, +}; + +module_platform_driver(kirkwood_thermal_driver); + +MODULE_AUTHOR("Nobuhiro Iwamatsu <iwamatsu@nigauri.org>"); +MODULE_DESCRIPTION("kirkwood thermal driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/thermal/rcar_thermal.c b/drivers/thermal/rcar_thermal.c index 90db951725d..28f09199401 100644 --- a/drivers/thermal/rcar_thermal.c +++ b/drivers/thermal/rcar_thermal.c @@ -19,225 +19,473 @@ */ #include <linux/delay.h> #include <linux/err.h> +#include <linux/irq.h> +#include <linux/interrupt.h> #include <linux/io.h> #include <linux/module.h> #include <linux/platform_device.h> +#include <linux/reboot.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/thermal.h> -#define THSCR 0x2c -#define THSSR 0x30 +#define IDLE_INTERVAL 5000 + +#define COMMON_STR 0x00 +#define COMMON_ENR 0x04 +#define COMMON_INTMSK 0x0c + +#define REG_POSNEG 0x20 +#define REG_FILONOFF 0x28 +#define REG_THSCR 0x2c +#define REG_THSSR 0x30 +#define REG_INTCTRL 0x34 /* THSCR */ -#define CPTAP 0xf +#define CPCTL (1 << 12) /* THSSR */ #define CTEMP 0x3f - -struct rcar_thermal_priv { +struct rcar_thermal_common { void __iomem *base; struct device *dev; + struct list_head head; spinlock_t lock; - u32 comp; }; +struct rcar_thermal_priv { + void __iomem *base; + struct rcar_thermal_common *common; + struct thermal_zone_device *zone; + struct delayed_work work; + struct mutex lock; + struct list_head list; + int id; + int ctemp; +}; + +#define rcar_thermal_for_each_priv(pos, common) \ + list_for_each_entry(pos, &common->head, list) + #define MCELSIUS(temp) ((temp) * 1000) -#define rcar_zone_to_priv(zone) (zone->devdata) +#define rcar_zone_to_priv(zone) ((zone)->devdata) +#define rcar_priv_to_dev(priv) ((priv)->common->dev) +#define rcar_has_irq_support(priv) ((priv)->common->base) +#define rcar_id_to_shift(priv) ((priv)->id * 8) + +#ifdef DEBUG +# define rcar_force_update_temp(priv) 1 +#else +# define rcar_force_update_temp(priv) 0 +#endif /* * basic functions */ -static u32 rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg) +#define rcar_thermal_common_read(c, r) \ + _rcar_thermal_common_read(c, COMMON_ ##r) +static u32 _rcar_thermal_common_read(struct rcar_thermal_common *common, + u32 reg) { - unsigned long flags; - u32 ret; - - spin_lock_irqsave(&priv->lock, flags); + return ioread32(common->base + reg); +} - ret = ioread32(priv->base + reg); +#define rcar_thermal_common_write(c, r, d) \ + _rcar_thermal_common_write(c, COMMON_ ##r, d) +static void _rcar_thermal_common_write(struct rcar_thermal_common *common, + u32 reg, u32 data) +{ + iowrite32(data, common->base + reg); +} - spin_unlock_irqrestore(&priv->lock, flags); +#define rcar_thermal_common_bset(c, r, m, d) \ + _rcar_thermal_common_bset(c, COMMON_ ##r, m, d) +static void _rcar_thermal_common_bset(struct rcar_thermal_common *common, + u32 reg, u32 mask, u32 data) +{ + u32 val; - return ret; + val = ioread32(common->base + reg); + val &= ~mask; + val |= (data & mask); + iowrite32(val, common->base + reg); } -#if 0 /* no user at this point */ -static void rcar_thermal_write(struct rcar_thermal_priv *priv, - u32 reg, u32 data) +#define rcar_thermal_read(p, r) _rcar_thermal_read(p, REG_ ##r) +static u32 _rcar_thermal_read(struct rcar_thermal_priv *priv, u32 reg) { - unsigned long flags; - - spin_lock_irqsave(&priv->lock, flags); + return ioread32(priv->base + reg); +} +#define rcar_thermal_write(p, r, d) _rcar_thermal_write(p, REG_ ##r, d) +static void _rcar_thermal_write(struct rcar_thermal_priv *priv, + u32 reg, u32 data) +{ iowrite32(data, priv->base + reg); - - spin_unlock_irqrestore(&priv->lock, flags); } -#endif -static void rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg, - u32 mask, u32 data) +#define rcar_thermal_bset(p, r, m, d) _rcar_thermal_bset(p, REG_ ##r, m, d) +static void _rcar_thermal_bset(struct rcar_thermal_priv *priv, u32 reg, + u32 mask, u32 data) { - unsigned long flags; u32 val; - spin_lock_irqsave(&priv->lock, flags); - val = ioread32(priv->base + reg); val &= ~mask; val |= (data & mask); iowrite32(val, priv->base + reg); - - spin_unlock_irqrestore(&priv->lock, flags); } /* * zone device functions */ -static int rcar_thermal_get_temp(struct thermal_zone_device *zone, - unsigned long *temp) +static int rcar_thermal_update_temp(struct rcar_thermal_priv *priv) { - struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); - int val, min, max, tmp; - - tmp = -200; /* default */ - while (1) { - if (priv->comp < 1 || priv->comp > 12) { - dev_err(priv->dev, - "THSSR invalid data (%d)\n", priv->comp); - priv->comp = 4; /* for next thermal */ - return -EINVAL; - } + struct device *dev = rcar_priv_to_dev(priv); + int i; + int ctemp, old, new; - /* - * THS comparator offset and the reference temperature - * - * Comparator | reference | Temperature field - * offset | temperature | measurement - * | (degrees C) | (degrees C) - * -------------+---------------+------------------- - * 1 | -45 | -45 to -30 - * 2 | -30 | -30 to -15 - * 3 | -15 | -15 to 0 - * 4 | 0 | 0 to +15 - * 5 | +15 | +15 to +30 - * 6 | +30 | +30 to +45 - * 7 | +45 | +45 to +60 - * 8 | +60 | +60 to +75 - * 9 | +75 | +75 to +90 - * 10 | +90 | +90 to +105 - * 11 | +105 | +105 to +120 - * 12 | +120 | +120 to +135 - */ + mutex_lock(&priv->lock); - /* calculate thermal limitation */ - min = (priv->comp * 15) - 60; - max = min + 15; + /* + * TSC decides a value of CPTAP automatically, + * and this is the conditions which validate interrupt. + */ + rcar_thermal_bset(priv, THSCR, CPCTL, CPCTL); + ctemp = 0; + old = ~0; + for (i = 0; i < 128; i++) { /* * we need to wait 300us after changing comparator offset * to get stable temperature. * see "Usage Notes" on datasheet */ - rcar_thermal_bset(priv, THSCR, CPTAP, priv->comp); udelay(300); - /* calculate current temperature */ - val = rcar_thermal_read(priv, THSSR) & CTEMP; - val = (val * 5) - 65; + new = rcar_thermal_read(priv, THSSR) & CTEMP; + if (new == old) { + ctemp = new; + break; + } + old = new; + } - dev_dbg(priv->dev, "comp/min/max/val = %d/%d/%d/%d\n", - priv->comp, min, max, val); + if (!ctemp) { + dev_err(dev, "thermal sensor was broken\n"); + return -EINVAL; + } - /* - * If val is same as min/max, then, - * it should try again on next comparator. - * But the val might be correct temperature. - * Keep it on "tmp" and compare with next val. - */ - if (tmp == val) - break; + /* + * enable IRQ + */ + if (rcar_has_irq_support(priv)) { + rcar_thermal_write(priv, FILONOFF, 0); - if (val <= min) { - tmp = min; - priv->comp--; /* try again */ - } else if (val >= max) { - tmp = max; - priv->comp++; /* try again */ - } else { - tmp = val; - break; - } + /* enable Rising/Falling edge interrupt */ + rcar_thermal_write(priv, POSNEG, 0x1); + rcar_thermal_write(priv, INTCTRL, (((ctemp - 0) << 8) | + ((ctemp - 1) << 0))); + } + + dev_dbg(dev, "thermal%d %d -> %d\n", priv->id, priv->ctemp, ctemp); + + priv->ctemp = ctemp; + + mutex_unlock(&priv->lock); + + return 0; +} + +static int rcar_thermal_get_temp(struct thermal_zone_device *zone, + unsigned long *temp) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + + if (!rcar_has_irq_support(priv) || rcar_force_update_temp(priv)) + rcar_thermal_update_temp(priv); + + mutex_lock(&priv->lock); + *temp = MCELSIUS((priv->ctemp * 5) - 65); + mutex_unlock(&priv->lock); + + return 0; +} + +static int rcar_thermal_get_trip_type(struct thermal_zone_device *zone, + int trip, enum thermal_trip_type *type) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + /* see rcar_thermal_get_temp() */ + switch (trip) { + case 0: /* +90 <= temp */ + *type = THERMAL_TRIP_CRITICAL; + break; + default: + dev_err(dev, "rcar driver trip error\n"); + return -EINVAL; + } + + return 0; +} + +static int rcar_thermal_get_trip_temp(struct thermal_zone_device *zone, + int trip, unsigned long *temp) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + /* see rcar_thermal_get_temp() */ + switch (trip) { + case 0: /* +90 <= temp */ + *temp = MCELSIUS(90); + break; + default: + dev_err(dev, "rcar driver trip error\n"); + return -EINVAL; + } + + return 0; +} + +static int rcar_thermal_notify(struct thermal_zone_device *zone, + int trip, enum thermal_trip_type type) +{ + struct rcar_thermal_priv *priv = rcar_zone_to_priv(zone); + struct device *dev = rcar_priv_to_dev(priv); + + switch (type) { + case THERMAL_TRIP_CRITICAL: + /* FIXME */ + dev_warn(dev, "Thermal reached to critical temperature\n"); + break; + default: + break; } - *temp = MCELSIUS(tmp); return 0; } static struct thermal_zone_device_ops rcar_thermal_zone_ops = { - .get_temp = rcar_thermal_get_temp, + .get_temp = rcar_thermal_get_temp, + .get_trip_type = rcar_thermal_get_trip_type, + .get_trip_temp = rcar_thermal_get_trip_temp, + .notify = rcar_thermal_notify, }; /* - * platform functions + * interrupt */ -static int rcar_thermal_probe(struct platform_device *pdev) +#define rcar_thermal_irq_enable(p) _rcar_thermal_irq_ctrl(p, 1) +#define rcar_thermal_irq_disable(p) _rcar_thermal_irq_ctrl(p, 0) +static void _rcar_thermal_irq_ctrl(struct rcar_thermal_priv *priv, int enable) +{ + struct rcar_thermal_common *common = priv->common; + unsigned long flags; + u32 mask = 0x3 << rcar_id_to_shift(priv); /* enable Rising/Falling */ + + spin_lock_irqsave(&common->lock, flags); + + rcar_thermal_common_bset(common, INTMSK, mask, enable ? 0 : mask); + + spin_unlock_irqrestore(&common->lock, flags); +} + +static void rcar_thermal_work(struct work_struct *work) { - struct thermal_zone_device *zone; struct rcar_thermal_priv *priv; - struct resource *res; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "Could not get platform resource\n"); - return -ENODEV; + priv = container_of(work, struct rcar_thermal_priv, work.work); + + rcar_thermal_update_temp(priv); + rcar_thermal_irq_enable(priv); + thermal_zone_device_update(priv->zone); +} + +static u32 rcar_thermal_had_changed(struct rcar_thermal_priv *priv, u32 status) +{ + struct device *dev = rcar_priv_to_dev(priv); + + status = (status >> rcar_id_to_shift(priv)) & 0x3; + + if (status & 0x3) { + dev_dbg(dev, "thermal%d %s%s\n", + priv->id, + (status & 0x2) ? "Rising " : "", + (status & 0x1) ? "Falling" : ""); } - priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); - if (!priv) { - dev_err(&pdev->dev, "Could not allocate priv\n"); - return -ENOMEM; + return status; +} + +static irqreturn_t rcar_thermal_irq(int irq, void *data) +{ + struct rcar_thermal_common *common = data; + struct rcar_thermal_priv *priv; + unsigned long flags; + u32 status, mask; + + spin_lock_irqsave(&common->lock, flags); + + mask = rcar_thermal_common_read(common, INTMSK); + status = rcar_thermal_common_read(common, STR); + rcar_thermal_common_write(common, STR, 0x000F0F0F & mask); + + spin_unlock_irqrestore(&common->lock, flags); + + status = status & ~mask; + + /* + * check the status + */ + rcar_thermal_for_each_priv(priv, common) { + if (rcar_thermal_had_changed(priv, status)) { + rcar_thermal_irq_disable(priv); + schedule_delayed_work(&priv->work, + msecs_to_jiffies(300)); + } } - priv->comp = 4; /* basic setup */ - priv->dev = &pdev->dev; - spin_lock_init(&priv->lock); - priv->base = devm_ioremap_nocache(&pdev->dev, - res->start, resource_size(res)); - if (!priv->base) { - dev_err(&pdev->dev, "Unable to ioremap thermal register\n"); + return IRQ_HANDLED; +} + +/* + * platform functions + */ +static int rcar_thermal_probe(struct platform_device *pdev) +{ + struct rcar_thermal_common *common; + struct rcar_thermal_priv *priv; + struct device *dev = &pdev->dev; + struct resource *res, *irq; + int mres = 0; + int i; + int idle = IDLE_INTERVAL; + + common = devm_kzalloc(dev, sizeof(*common), GFP_KERNEL); + if (!common) { + dev_err(dev, "Could not allocate common\n"); return -ENOMEM; } - zone = thermal_zone_device_register("rcar_thermal", 0, 0, priv, - &rcar_thermal_zone_ops, NULL, 0, 0); - if (IS_ERR(zone)) { - dev_err(&pdev->dev, "thermal zone device is NULL\n"); - return PTR_ERR(zone); + INIT_LIST_HEAD(&common->head); + spin_lock_init(&common->lock); + common->dev = dev; + + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (irq) { + int ret; + + /* + * platform has IRQ support. + * Then, drier use common register + */ + res = platform_get_resource(pdev, IORESOURCE_MEM, mres++); + if (!res) { + dev_err(dev, "Could not get platform resource\n"); + return -ENODEV; + } + + ret = devm_request_irq(dev, irq->start, rcar_thermal_irq, 0, + dev_name(dev), common); + if (ret) { + dev_err(dev, "irq request failed\n "); + return ret; + } + + /* + * rcar_has_irq_support() will be enabled + */ + common->base = devm_request_and_ioremap(dev, res); + if (!common->base) { + dev_err(dev, "Unable to ioremap thermal register\n"); + return -ENOMEM; + } + + /* enable temperature comparation */ + rcar_thermal_common_write(common, ENR, 0x00030303); + + idle = 0; /* polling delaye is not needed */ } - platform_set_drvdata(pdev, zone); + for (i = 0;; i++) { + res = platform_get_resource(pdev, IORESOURCE_MEM, mres++); + if (!res) + break; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) { + dev_err(dev, "Could not allocate priv\n"); + return -ENOMEM; + } + + priv->base = devm_request_and_ioremap(dev, res); + if (!priv->base) { + dev_err(dev, "Unable to ioremap priv register\n"); + return -ENOMEM; + } - dev_info(&pdev->dev, "proved\n"); + priv->common = common; + priv->id = i; + mutex_init(&priv->lock); + INIT_LIST_HEAD(&priv->list); + INIT_DELAYED_WORK(&priv->work, rcar_thermal_work); + rcar_thermal_update_temp(priv); + + priv->zone = thermal_zone_device_register("rcar_thermal", + 1, 0, priv, + &rcar_thermal_zone_ops, NULL, 0, + idle); + if (IS_ERR(priv->zone)) { + dev_err(dev, "can't register thermal zone\n"); + goto error_unregister; + } + + list_move_tail(&priv->list, &common->head); + + if (rcar_has_irq_support(priv)) + rcar_thermal_irq_enable(priv); + } + + platform_set_drvdata(pdev, common); + + dev_info(dev, "%d sensor proved\n", i); return 0; + +error_unregister: + rcar_thermal_for_each_priv(priv, common) + thermal_zone_device_unregister(priv->zone); + + return -ENODEV; } static int rcar_thermal_remove(struct platform_device *pdev) { - struct thermal_zone_device *zone = platform_get_drvdata(pdev); + struct rcar_thermal_common *common = platform_get_drvdata(pdev); + struct rcar_thermal_priv *priv; + + rcar_thermal_for_each_priv(priv, common) + thermal_zone_device_unregister(priv->zone); - thermal_zone_device_unregister(zone); platform_set_drvdata(pdev, NULL); return 0; } +static const struct of_device_id rcar_thermal_dt_ids[] = { + { .compatible = "renesas,rcar-thermal", }, + {}, +}; +MODULE_DEVICE_TABLE(of, rcar_thermal_dt_ids); + static struct platform_driver rcar_thermal_driver = { .driver = { .name = "rcar_thermal", + .of_match_table = rcar_thermal_dt_ids, }, .probe = rcar_thermal_probe, .remove = rcar_thermal_remove, diff --git a/drivers/thermal/spear_thermal.c b/drivers/thermal/spear_thermal.c index 6b2d8b21aae..3c5ee560797 100644 --- a/drivers/thermal/spear_thermal.c +++ b/drivers/thermal/spear_thermal.c @@ -131,7 +131,7 @@ static int spear_thermal_probe(struct platform_device *pdev) return -ENOMEM; } - stdev->clk = clk_get(&pdev->dev, NULL); + stdev->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(stdev->clk)) { dev_err(&pdev->dev, "Can't get clock\n"); return PTR_ERR(stdev->clk); @@ -140,7 +140,7 @@ static int spear_thermal_probe(struct platform_device *pdev) ret = clk_enable(stdev->clk); if (ret) { dev_err(&pdev->dev, "Can't enable clock\n"); - goto put_clk; + return ret; } stdev->flags = val; @@ -163,8 +163,6 @@ static int spear_thermal_probe(struct platform_device *pdev) disable_clk: clk_disable(stdev->clk); -put_clk: - clk_put(stdev->clk); return ret; } @@ -183,7 +181,6 @@ static int spear_thermal_exit(struct platform_device *pdev) writel_relaxed(actual_mask & ~stdev->flags, stdev->thermal_base); clk_disable(stdev->clk); - clk_put(stdev->clk); return 0; } diff --git a/drivers/thermal/step_wise.c b/drivers/thermal/step_wise.c index 0cd5e9fbab1..407cde3211c 100644 --- a/drivers/thermal/step_wise.c +++ b/drivers/thermal/step_wise.c @@ -35,21 +35,54 @@ * state for this trip point * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling * state for this trip point + * c. if the trend is THERMAL_TREND_RAISE_FULL, use upper limit + * for this trip point + * d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit + * for this trip point + * If the temperature is lower than a trip point, + * a. if the trend is THERMAL_TREND_RAISING, do nothing + * b. if the trend is THERMAL_TREND_DROPPING, use lower cooling + * state for this trip point, if the cooling state already + * equals lower limit, deactivate the thermal instance + * c. if the trend is THERMAL_TREND_RAISE_FULL, do nothing + * d. if the trend is THERMAL_TREND_DROP_FULL, use lower limit, + * if the cooling state already equals lower limit, + * deactive the thermal instance */ static unsigned long get_target_state(struct thermal_instance *instance, - enum thermal_trend trend) + enum thermal_trend trend, bool throttle) { struct thermal_cooling_device *cdev = instance->cdev; unsigned long cur_state; cdev->ops->get_cur_state(cdev, &cur_state); - if (trend == THERMAL_TREND_RAISING) { - cur_state = cur_state < instance->upper ? - (cur_state + 1) : instance->upper; - } else if (trend == THERMAL_TREND_DROPPING) { - cur_state = cur_state > instance->lower ? - (cur_state - 1) : instance->lower; + switch (trend) { + case THERMAL_TREND_RAISING: + if (throttle) + cur_state = cur_state < instance->upper ? + (cur_state + 1) : instance->upper; + break; + case THERMAL_TREND_RAISE_FULL: + if (throttle) + cur_state = instance->upper; + break; + case THERMAL_TREND_DROPPING: + if (cur_state == instance->lower) { + if (!throttle) + cur_state = -1; + } else + cur_state -= 1; + break; + case THERMAL_TREND_DROP_FULL: + if (cur_state == instance->lower) { + if (!throttle) + cur_state = -1; + } else + cur_state = instance->lower; + break; + default: + break; } return cur_state; @@ -66,57 +99,14 @@ static void update_passive_instance(struct thermal_zone_device *tz, tz->passive += value; } -static void update_instance_for_throttle(struct thermal_zone_device *tz, - int trip, enum thermal_trip_type trip_type, - enum thermal_trend trend) -{ - struct thermal_instance *instance; - - list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip) - continue; - - instance->target = get_target_state(instance, trend); - - /* Activate a passive thermal instance */ - if (instance->target == THERMAL_NO_TARGET) - update_passive_instance(tz, trip_type, 1); - - instance->cdev->updated = false; /* cdev needs update */ - } -} - -static void update_instance_for_dethrottle(struct thermal_zone_device *tz, - int trip, enum thermal_trip_type trip_type) -{ - struct thermal_instance *instance; - struct thermal_cooling_device *cdev; - unsigned long cur_state; - - list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip || - instance->target == THERMAL_NO_TARGET) - continue; - - cdev = instance->cdev; - cdev->ops->get_cur_state(cdev, &cur_state); - - instance->target = cur_state > instance->lower ? - (cur_state - 1) : THERMAL_NO_TARGET; - - /* Deactivate a passive thermal instance */ - if (instance->target == THERMAL_NO_TARGET) - update_passive_instance(tz, trip_type, -1); - - cdev->updated = false; /* cdev needs update */ - } -} - static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) { long trip_temp; enum thermal_trip_type trip_type; enum thermal_trend trend; + struct thermal_instance *instance; + bool throttle = false; + int old_target; if (trip == THERMAL_TRIPS_NONE) { trip_temp = tz->forced_passive; @@ -128,12 +118,30 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip) trend = get_tz_trend(tz, trip); + if (tz->temperature >= trip_temp) + throttle = true; + mutex_lock(&tz->lock); - if (tz->temperature >= trip_temp) - update_instance_for_throttle(tz, trip, trip_type, trend); - else - update_instance_for_dethrottle(tz, trip, trip_type); + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if (instance->trip != trip) + continue; + + old_target = instance->target; + instance->target = get_target_state(instance, trend, throttle); + + /* Activate a passive thermal instance */ + if (old_target == THERMAL_NO_TARGET && + instance->target != THERMAL_NO_TARGET) + update_passive_instance(tz, trip_type, 1); + /* Deactivate a passive thermal instance */ + else if (old_target != THERMAL_NO_TARGET && + instance->target == THERMAL_NO_TARGET) + update_passive_instance(tz, trip_type, -1); + + + instance->cdev->updated = false; /* cdev needs update */ + } mutex_unlock(&tz->lock); } diff --git a/drivers/thermal/thermal_sys.c b/drivers/thermal/thermal_sys.c index 84e95f32cdb..5b7863a03f9 100644 --- a/drivers/thermal/thermal_sys.c +++ b/drivers/thermal/thermal_sys.c @@ -32,7 +32,6 @@ #include <linux/kdev_t.h> #include <linux/idr.h> #include <linux/thermal.h> -#include <linux/spinlock.h> #include <linux/reboot.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -348,8 +347,9 @@ static void handle_critical_trips(struct thermal_zone_device *tz, tz->ops->notify(tz, trip, trip_type); if (trip_type == THERMAL_TRIP_CRITICAL) { - pr_emerg("Critical temperature reached(%d C),shutting down\n", - tz->temperature / 1000); + dev_emerg(&tz->device, + "critical temperature reached(%d C),shutting down\n", + tz->temperature / 1000); orderly_poweroff(true); } } @@ -371,23 +371,57 @@ static void handle_thermal_trip(struct thermal_zone_device *tz, int trip) monitor_thermal_zone(tz); } +static int thermal_zone_get_temp(struct thermal_zone_device *tz, + unsigned long *temp) +{ + int ret = 0; +#ifdef CONFIG_THERMAL_EMULATION + int count; + unsigned long crit_temp = -1UL; + enum thermal_trip_type type; +#endif + + mutex_lock(&tz->lock); + + ret = tz->ops->get_temp(tz, temp); +#ifdef CONFIG_THERMAL_EMULATION + if (!tz->emul_temperature) + goto skip_emul; + + for (count = 0; count < tz->trips; count++) { + ret = tz->ops->get_trip_type(tz, count, &type); + if (!ret && type == THERMAL_TRIP_CRITICAL) { + ret = tz->ops->get_trip_temp(tz, count, &crit_temp); + break; + } + } + + if (ret) + goto skip_emul; + + if (*temp < crit_temp) + *temp = tz->emul_temperature; +skip_emul: +#endif + mutex_unlock(&tz->lock); + return ret; +} + static void update_temperature(struct thermal_zone_device *tz) { long temp; int ret; - mutex_lock(&tz->lock); - - ret = tz->ops->get_temp(tz, &temp); + ret = thermal_zone_get_temp(tz, &temp); if (ret) { - pr_warn("failed to read out thermal zone %d\n", tz->id); - goto exit; + dev_warn(&tz->device, "failed to read out thermal zone %d\n", + tz->id); + return; } + mutex_lock(&tz->lock); tz->last_temperature = tz->temperature; tz->temperature = temp; - -exit: mutex_unlock(&tz->lock); } @@ -430,10 +464,7 @@ temp_show(struct device *dev, struct device_attribute *attr, char *buf) long temperature; int ret; - if (!tz->ops->get_temp) - return -EPERM; - - ret = tz->ops->get_temp(tz, &temperature); + ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; @@ -693,6 +724,31 @@ policy_show(struct device *dev, struct device_attribute *devattr, char *buf) return sprintf(buf, "%s\n", tz->governor->name); } +#ifdef CONFIG_THERMAL_EMULATION +static ssize_t +emul_temp_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct thermal_zone_device *tz = to_thermal_zone(dev); + int ret = 0; + unsigned long temperature; + + if (kstrtoul(buf, 10, &temperature)) + return -EINVAL; + + if (!tz->ops->set_emul_temp) { + mutex_lock(&tz->lock); + tz->emul_temperature = temperature; + mutex_unlock(&tz->lock); + } else { + ret = tz->ops->set_emul_temp(tz, temperature); + } + + return ret ? ret : count; +} +static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store); +#endif/*CONFIG_THERMAL_EMULATION*/ + static DEVICE_ATTR(type, 0444, type_show, NULL); static DEVICE_ATTR(temp, 0444, temp_show, NULL); static DEVICE_ATTR(mode, 0644, mode_show, mode_store); @@ -835,7 +891,7 @@ temp_input_show(struct device *dev, struct device_attribute *attr, char *buf) temp_input); struct thermal_zone_device *tz = temp->tz; - ret = tz->ops->get_temp(tz, &temperature); + ret = thermal_zone_get_temp(tz, &temperature); if (ret) return ret; @@ -1522,6 +1578,9 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, if (!ops || !ops->get_temp) return ERR_PTR(-EINVAL); + if (trips > 0 && !ops->get_trip_type) + return ERR_PTR(-EINVAL); + tz = kzalloc(sizeof(struct thermal_zone_device), GFP_KERNEL); if (!tz) return ERR_PTR(-ENOMEM); @@ -1585,6 +1644,11 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, goto unregister; } +#ifdef CONFIG_THERMAL_EMULATION + result = device_create_file(&tz->device, &dev_attr_emul_temp); + if (result) + goto unregister; +#endif /* Create policy attribute */ result = device_create_file(&tz->device, &dev_attr_policy); if (result) @@ -1704,7 +1768,8 @@ static struct genl_multicast_group thermal_event_mcgrp = { .name = THERMAL_GENL_MCAST_GROUP_NAME, }; -int thermal_generate_netlink_event(u32 orig, enum events event) +int thermal_generate_netlink_event(struct thermal_zone_device *tz, + enum events event) { struct sk_buff *skb; struct nlattr *attr; @@ -1714,6 +1779,9 @@ int thermal_generate_netlink_event(u32 orig, enum events event) int result; static unsigned int thermal_event_seqnum; + if (!tz) + return -EINVAL; + /* allocate memory */ size = nla_total_size(sizeof(struct thermal_genl_event)) + nla_total_size(0); @@ -1748,7 +1816,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event) memset(thermal_event, 0, sizeof(struct thermal_genl_event)); - thermal_event->orig = orig; + thermal_event->orig = tz->id; thermal_event->event = event; /* send multicast genetlink message */ @@ -1760,7 +1828,7 @@ int thermal_generate_netlink_event(u32 orig, enum events event) result = genlmsg_multicast(skb, 0, thermal_event_mcgrp.id, GFP_ATOMIC); if (result) - pr_info("failed to send netlink event:%d\n", result); + dev_err(&tz->device, "Failed to send netlink event:%d", result); return result; } @@ -1800,6 +1868,7 @@ static int __init thermal_init(void) idr_destroy(&thermal_cdev_idr); mutex_destroy(&thermal_idr_lock); mutex_destroy(&thermal_list_lock); + return result; } result = genetlink_init(); return result; diff --git a/drivers/w1/masters/mxc_w1.c b/drivers/w1/masters/mxc_w1.c index 372c8c0d54a..950d354d50e 100644 --- a/drivers/w1/masters/mxc_w1.c +++ b/drivers/w1/masters/mxc_w1.c @@ -157,9 +157,16 @@ static int mxc_w1_remove(struct platform_device *pdev) return 0; } +static struct of_device_id mxc_w1_dt_ids[] = { + { .compatible = "fsl,imx21-owire" }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, mxc_w1_dt_ids); + static struct platform_driver mxc_w1_driver = { .driver = { - .name = "mxc_w1", + .name = "mxc_w1", + .of_match_table = mxc_w1_dt_ids, }, .probe = mxc_w1_probe, .remove = mxc_w1_remove, @@ -1428,6 +1428,8 @@ void bio_endio(struct bio *bio, int error) else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) error = -EIO; + trace_block_bio_complete(bio, error); + if (bio->bi_end_io) bio->bi_end_io(bio, error); } diff --git a/fs/block_dev.c b/fs/block_dev.c index 53f5fae5cfb..aea605c98ba 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1033,7 +1033,9 @@ void bd_set_size(struct block_device *bdev, loff_t size) { unsigned bsize = bdev_logical_block_size(bdev); - bdev->bd_inode->i_size = size; + mutex_lock(&bdev->bd_inode->i_mutex); + i_size_write(bdev->bd_inode, size); + mutex_unlock(&bdev->bd_inode->i_mutex); while (bsize < PAGE_CACHE_SIZE) { if (size & bsize) break; @@ -1118,7 +1120,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } } - if (!ret && !bdev->bd_openers) { + if (!ret) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); bdi = blk_get_backing_dev_info(bdev); if (bdi == NULL) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1e59ed575cc..cf54bdfee33 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3689,20 +3689,6 @@ static int can_overcommit(struct btrfs_root *root, return 0; } -static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, - unsigned long nr_pages, - enum wb_reason reason) -{ - if (!writeback_in_progress(sb->s_bdi) && - down_read_trylock(&sb->s_umount)) { - writeback_inodes_sb_nr(sb, nr_pages, reason); - up_read(&sb->s_umount); - return 1; - } - - return 0; -} - /* * shrink metadata reservation for delalloc */ @@ -3735,9 +3721,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, while (delalloc_bytes && loops < 3) { max_reclaim = min(delalloc_bytes, to_reclaim); nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; - writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb, - nr_pages, - WB_REASON_FS_FREE_SPACE); + try_to_writeback_inodes_sb_nr(root->fs_info->sb, + nr_pages, + WB_REASON_FS_FREE_SPACE); /* * We need to wait for the async pages to actually start before diff --git a/fs/buffer.c b/fs/buffer.c index 8e18281b407..b4dcb34c963 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -41,6 +41,7 @@ #include <linux/bitops.h> #include <linux/mpage.h> #include <linux/bit_spinlock.h> +#include <trace/events/block.h> static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); @@ -53,6 +54,13 @@ void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private) } EXPORT_SYMBOL(init_buffer); +inline void touch_buffer(struct buffer_head *bh) +{ + trace_block_touch_buffer(bh); + mark_page_accessed(bh->b_page); +} +EXPORT_SYMBOL(touch_buffer); + static int sleep_on_buffer(void *word) { io_schedule(); @@ -1113,6 +1121,8 @@ void mark_buffer_dirty(struct buffer_head *bh) { WARN_ON_ONCE(!buffer_uptodate(bh)); + trace_block_dirty_buffer(bh); + /* * Very *carefully* optimize the it-is-already-dirty case. * diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index d4f81edd9a5..a60ea977af6 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -236,16 +236,10 @@ static int ceph_readpage(struct file *filp, struct page *page) static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg) { struct inode *inode = req->r_inode; - struct ceph_osd_reply_head *replyhead; - int rc, bytes; + int rc = req->r_result; + int bytes = le32_to_cpu(msg->hdr.data_len); int i; - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - rc = le32_to_cpu(replyhead->result); - bytes = le32_to_cpu(msg->hdr.data_len); - dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); /* unlock all pages, zeroing any data we didn't read */ @@ -315,7 +309,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, ci->i_truncate_seq, ci->i_truncate_size, - NULL, false, 1, 0); + NULL, false, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -492,8 +486,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) &ci->i_layout, snapc, page_off, len, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, - &page, 1, 0, 0, true); + &inode->i_mtime, &page, 1); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); @@ -554,27 +547,18 @@ static void writepages_finish(struct ceph_osd_request *req, struct ceph_msg *msg) { struct inode *inode = req->r_inode; - struct ceph_osd_reply_head *replyhead; - struct ceph_osd_op *op; struct ceph_inode_info *ci = ceph_inode(inode); unsigned wrote; struct page *page; int i; struct ceph_snap_context *snapc = req->r_snapc; struct address_space *mapping = inode->i_mapping; - __s32 rc = -EIO; - u64 bytes = 0; + int rc = req->r_result; + u64 bytes = le64_to_cpu(req->r_request_ops[0].extent.length); struct ceph_fs_client *fsc = ceph_inode_to_client(inode); long writeback_stat; unsigned issued = ceph_caps_issued(ci); - /* parse reply */ - replyhead = msg->front.iov_base; - WARN_ON(le32_to_cpu(replyhead->num_ops) == 0); - op = (void *)(replyhead + 1); - rc = le32_to_cpu(replyhead->result); - bytes = le64_to_cpu(op->extent.length); - if (rc >= 0) { /* * Assume we wrote the pages we originally sent. The @@ -741,8 +725,6 @@ retry: struct page *page; int want; u64 offset, len; - struct ceph_osd_request_head *reqhead; - struct ceph_osd_op *op; long writeback_stat; next = 0; @@ -838,7 +820,7 @@ get_more_pages: snapc, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, true, 1, 0); + &inode->i_mtime, true, 0); if (IS_ERR(req)) { rc = PTR_ERR(req); @@ -906,10 +888,8 @@ get_more_pages: /* revise final length, page count */ req->r_num_pages = locked_pages; - reqhead = req->r_request->front.iov_base; - op = (void *)(reqhead + 1); - op->extent.length = cpu_to_le64(len); - op->payload_len = cpu_to_le32(len); + req->r_request_ops[0].extent.length = cpu_to_le64(len); + req->r_request_ops[0].payload_len = cpu_to_le32(len); req->r_request->hdr.data_len = cpu_to_le32(len); rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ae2be696eb5..78e2f575247 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -611,8 +611,16 @@ retry: if (flags & CEPH_CAP_FLAG_AUTH) ci->i_auth_cap = cap; - else if (ci->i_auth_cap == cap) + else if (ci->i_auth_cap == cap) { ci->i_auth_cap = NULL; + spin_lock(&mdsc->cap_dirty_lock); + if (!list_empty(&ci->i_dirty_item)) { + dout(" moving %p to cap_dirty_migrating\n", inode); + list_move(&ci->i_dirty_item, + &mdsc->cap_dirty_migrating); + } + spin_unlock(&mdsc->cap_dirty_lock); + } dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", inode, ceph_vinop(inode), cap, ceph_cap_string(issued), @@ -1460,7 +1468,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = &ci->vfs_inode; struct ceph_cap *cap; - int file_wanted, used; + int file_wanted, used, cap_used; int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */ int issued, implemented, want, retain, revoking, flushing = 0; int mds = -1; /* keep track of how far we've gone through i_caps list @@ -1563,9 +1571,14 @@ retry_locked: /* NOTE: no side-effects allowed, until we take s_mutex */ + cap_used = used; + if (ci->i_auth_cap && cap != ci->i_auth_cap) + cap_used &= ~ci->i_auth_cap->issued; + revoking = cap->implemented & ~cap->issued; - dout(" mds%d cap %p issued %s implemented %s revoking %s\n", + dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n", cap->mds, cap, ceph_cap_string(cap->issued), + ceph_cap_string(cap_used), ceph_cap_string(cap->implemented), ceph_cap_string(revoking)); @@ -1593,7 +1606,7 @@ retry_locked: } /* completed revocation? going down and there are no caps? */ - if (revoking && (revoking & used) == 0) { + if (revoking && (revoking & cap_used) == 0) { dout("completed revocation of %s\n", ceph_cap_string(cap->implemented & ~cap->issued)); goto ack; @@ -1670,8 +1683,8 @@ ack: sent++; /* __send_cap drops i_ceph_lock */ - delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want, - retain, flushing, NULL); + delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, cap_used, + want, retain, flushing, NULL); goto retry; /* retake i_ceph_lock and restart our cap scan. */ } @@ -2417,7 +2430,9 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, dout("mds wanted %s -> %s\n", ceph_cap_string(le32_to_cpu(grant->wanted)), ceph_cap_string(wanted)); - grant->wanted = cpu_to_le32(wanted); + /* imported cap may not have correct mds_wanted */ + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) + check_caps = 1; } cap->seq = seq; @@ -2821,6 +2836,9 @@ void ceph_handle_caps(struct ceph_mds_session *session, dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq, (unsigned)seq); + if (op == CEPH_CAP_OP_IMPORT) + ceph_add_cap_releases(mdsc, session); + /* lookup ino */ inode = ceph_find_inode(sb, vino); ci = ceph_inode(inode); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 11b57c2c8f1..bf338d9b67e 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -243,6 +243,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = ceph_mdsc_do_request(mdsc, (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, req); + if (err) + goto out_err; + err = ceph_handle_snapdir(req, dentry, err); if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); @@ -263,6 +266,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, err = finish_no_open(file, dn); } else { dout("atomic_open finish_open on dn %p\n", dn); + if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) { + *opened |= FILE_CREATED; + } err = finish_open(file, dentry, ceph_open, opened); } @@ -535,7 +541,7 @@ more: ci->i_snap_realm->cached_context, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &mtime, false, 2, page_align); + &mtime, false, page_align); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index f5ed767806d..4a989345b37 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -185,7 +185,6 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) &ceph_sb_to_client(inode->i_sb)->client->osdc; u64 len = 1, olen; u64 tmp; - struct ceph_object_layout ol; struct ceph_pg pgid; int r; @@ -194,7 +193,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) return -EFAULT; down_read(&osdc->map_sem); - r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, + r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, &dl.object_no, &dl.object_offset, &olen); if (r < 0) @@ -209,10 +208,9 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx", ceph_ino(inode), dl.object_no); - ceph_calc_object_layout(&ol, dl.object_name, &ci->i_layout, + ceph_calc_object_layout(&pgid, dl.object_name, &ci->i_layout, osdc->osdmap); - pgid = ol.ol_pgid; dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); if (dl.osd >= 0) { struct ceph_entity_addr *a = diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 7a3dfe0a9a8..442880d099c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -233,6 +233,30 @@ bad: } /* + * parse create results + */ +static int parse_reply_info_create(void **p, void *end, + struct ceph_mds_reply_info_parsed *info, + int features) +{ + if (features & CEPH_FEATURE_REPLY_CREATE_INODE) { + if (*p == end) { + info->has_create_ino = false; + } else { + info->has_create_ino = true; + info->ino = ceph_decode_64(p); + } + } + + if (unlikely(*p != end)) + goto bad; + return 0; + +bad: + return -EIO; +} + +/* * parse extra results */ static int parse_reply_info_extra(void **p, void *end, @@ -241,8 +265,12 @@ static int parse_reply_info_extra(void **p, void *end, { if (info->head->op == CEPH_MDS_OP_GETFILELOCK) return parse_reply_info_filelock(p, end, info, features); - else + else if (info->head->op == CEPH_MDS_OP_READDIR) return parse_reply_info_dir(p, end, info, features); + else if (info->head->op == CEPH_MDS_OP_CREATE) + return parse_reply_info_create(p, end, info, features); + else + return -EIO; } /* @@ -2170,7 +2198,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) mutex_lock(&req->r_fill_mutex); err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); if (err == 0) { - if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK && + if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR || + req->r_op == CEPH_MDS_OP_LSSNAP) && rinfo->dir_nr) ceph_readdir_prepopulate(req, req->r_session); ceph_unreserve_caps(mdsc, &req->r_caps_reservation); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index ff4188bf619..c2a19fbbe51 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -74,6 +74,12 @@ struct ceph_mds_reply_info_parsed { struct ceph_mds_reply_info_in *dir_in; u8 dir_complete, dir_end; }; + + /* for create results */ + struct { + bool has_create_ino; + u64 ino; + }; }; /* encoded blob describing snapshot contexts for certain diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 73b7d44e8a3..0d3c9240c61 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -59,6 +59,10 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) return ERR_PTR(-ENOMEM); ceph_decode_16_safe(p, end, version, bad); + if (version > 3) { + pr_warning("got mdsmap version %d > 3, failing", version); + goto bad; + } ceph_decode_need(p, end, 8*sizeof(u32) + sizeof(u64), bad); m->m_epoch = ceph_decode_32(p); @@ -144,13 +148,13 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) /* pg_pools */ ceph_decode_32_safe(p, end, n, bad); m->m_num_data_pg_pools = n; - m->m_data_pg_pools = kcalloc(n, sizeof(u32), GFP_NOFS); + m->m_data_pg_pools = kcalloc(n, sizeof(u64), GFP_NOFS); if (!m->m_data_pg_pools) goto badmem; - ceph_decode_need(p, end, sizeof(u32)*(n+1), bad); + ceph_decode_need(p, end, sizeof(u64)*(n+1), bad); for (i = 0; i < n; i++) - m->m_data_pg_pools[i] = ceph_decode_32(p); - m->m_cas_pg_pool = ceph_decode_32(p); + m->m_data_pg_pools[i] = ceph_decode_64(p); + m->m_cas_pg_pool = ceph_decode_64(p); /* ok, we don't care about the rest. */ dout("mdsmap_decode success epoch %u\n", m->m_epoch); diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index cd5097d7c80..89fa4a940a0 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c @@ -15,6 +15,7 @@ const char *ceph_mds_state_name(int s) case CEPH_MDS_STATE_BOOT: return "up:boot"; case CEPH_MDS_STATE_STANDBY: return "up:standby"; case CEPH_MDS_STATE_STANDBY_REPLAY: return "up:standby-replay"; + case CEPH_MDS_STATE_REPLAYONCE: return "up:oneshot-replay"; case CEPH_MDS_STATE_CREATING: return "up:creating"; case CEPH_MDS_STATE_STARTING: return "up:starting"; /* up and in */ @@ -50,10 +51,13 @@ const char *ceph_mds_op_name(int op) case CEPH_MDS_OP_LOOKUP: return "lookup"; case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; + case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; case CEPH_MDS_OP_GETATTR: return "getattr"; case CEPH_MDS_OP_SETXATTR: return "setxattr"; case CEPH_MDS_OP_SETATTR: return "setattr"; case CEPH_MDS_OP_RMXATTR: return "rmxattr"; + case CEPH_MDS_OP_SETLAYOUT: return "setlayou"; + case CEPH_MDS_OP_SETDIRLAYOUT: return "setdirlayout"; case CEPH_MDS_OP_READDIR: return "readdir"; case CEPH_MDS_OP_MKNOD: return "mknod"; case CEPH_MDS_OP_LINK: return "link"; diff --git a/fs/ceph/super.c b/fs/ceph/super.c index e86aa994812..9fe17c6c287 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -71,8 +71,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) /* * express utilization in terms of large blocks to avoid * overflow on 32-bit machines. + * + * NOTE: for the time being, we make bsize == frsize to humor + * not-yet-ancient versions of glibc that are broken. + * Someday, we will probably want to report a real block + * size... whatever that may mean for a network file system! */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; + buf->f_frsize = 1 << CEPH_BLOCK_SHIFT; buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); @@ -80,7 +86,6 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; buf->f_namelen = NAME_MAX; - buf->f_frsize = PAGE_CACHE_SIZE; /* leave fsid little-endian, regardless of host endianness */ fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f053bbd1886..c7b309723dc 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -21,7 +21,7 @@ /* large granularity for statfs utilization stats to facilitate * large volume sizes on 32-bit machines. */ -#define CEPH_BLOCK_SHIFT 20 /* 1 MB */ +#define CEPH_BLOCK_SHIFT 22 /* 4 MB */ #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ @@ -798,13 +798,7 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); /* file.c */ extern const struct file_operations ceph_file_fops; extern const struct address_space_operations ceph_aops; -extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, - loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, - char *data, - loff_t off, size_t len); -extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); + extern int ceph_open(struct inode *inode, struct file *file); extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct file *file, unsigned flags, umode_t mode, diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 2c2ae5be990..9b6b2b6dd16 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -29,9 +29,94 @@ struct ceph_vxattr { size_t name_size; /* strlen(name) + 1 (for '\0') */ size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val, size_t size); - bool readonly; + bool readonly, hidden; + bool (*exists_cb)(struct ceph_inode_info *ci); }; +/* layouts */ + +static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) +{ + size_t s; + char *p = (char *)&ci->i_layout; + + for (s = 0; s < sizeof(ci->i_layout); s++, p++) + if (*p) + return true; + return false; +} + +static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, + size_t size) +{ + int ret; + struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); + struct ceph_osd_client *osdc = &fsc->client->osdc; + s64 pool = ceph_file_layout_pg_pool(ci->i_layout); + const char *pool_name; + + dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); + down_read(&osdc->map_sem); + pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); + if (pool_name) + ret = snprintf(val, size, + "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s", + (unsigned long long)ceph_file_layout_su(ci->i_layout), + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), + (unsigned long long)ceph_file_layout_object_size(ci->i_layout), + pool_name); + else + ret = snprintf(val, size, + "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", + (unsigned long long)ceph_file_layout_su(ci->i_layout), + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), + (unsigned long long)ceph_file_layout_object_size(ci->i_layout), + (unsigned long long)pool); + + up_read(&osdc->map_sem); + return ret; +} + +static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_su(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci, + char *val, size_t size) +{ + return snprintf(val, size, "%lld", + (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); +} + +static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci, + char *val, size_t size) +{ + int ret; + struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); + struct ceph_osd_client *osdc = &fsc->client->osdc; + s64 pool = ceph_file_layout_pg_pool(ci->i_layout); + const char *pool_name; + + down_read(&osdc->map_sem); + pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); + if (pool_name) + ret = snprintf(val, size, "%s", pool_name); + else + ret = snprintf(val, size, "%lld", (unsigned long long)pool); + up_read(&osdc->map_sem); + return ret; +} + /* directories */ static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val, @@ -83,17 +168,43 @@ static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val, (long)ci->i_rctime.tv_nsec); } -#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name -#define XATTR_NAME_CEPH(_type, _name) \ - { \ - .name = CEPH_XATTR_NAME(_type, _name), \ - .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ - .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ - .readonly = true, \ - } +#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name +#define CEPH_XATTR_NAME2(_type, _name, _name2) \ + XATTR_CEPH_PREFIX #_type "." #_name "." #_name2 + +#define XATTR_NAME_CEPH(_type, _name) \ + { \ + .name = CEPH_XATTR_NAME(_type, _name), \ + .name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \ + .getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \ + .readonly = true, \ + .hidden = false, \ + .exists_cb = NULL, \ + } +#define XATTR_LAYOUT_FIELD(_type, _name, _field) \ + { \ + .name = CEPH_XATTR_NAME2(_type, _name, _field), \ + .name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \ + .getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \ + .readonly = false, \ + .hidden = true, \ + .exists_cb = ceph_vxattrcb_layout_exists, \ + } static struct ceph_vxattr ceph_dir_vxattrs[] = { + { + .name = "ceph.dir.layout", + .name_size = sizeof("ceph.dir.layout"), + .getxattr_cb = ceph_vxattrcb_layout, + .readonly = false, + .hidden = false, + .exists_cb = ceph_vxattrcb_layout_exists, + }, + XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), + XATTR_LAYOUT_FIELD(dir, layout, stripe_count), + XATTR_LAYOUT_FIELD(dir, layout, object_size), + XATTR_LAYOUT_FIELD(dir, layout, pool), XATTR_NAME_CEPH(dir, entries), XATTR_NAME_CEPH(dir, files), XATTR_NAME_CEPH(dir, subdirs), @@ -102,35 +213,26 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { XATTR_NAME_CEPH(dir, rsubdirs), XATTR_NAME_CEPH(dir, rbytes), XATTR_NAME_CEPH(dir, rctime), - { 0 } /* Required table terminator */ + { .name = NULL, 0 } /* Required table terminator */ }; static size_t ceph_dir_vxattrs_name_size; /* total size of all names */ /* files */ -static size_t ceph_vxattrcb_file_layout(struct ceph_inode_info *ci, char *val, - size_t size) -{ - int ret; - - ret = snprintf(val, size, - "chunk_bytes=%lld\nstripe_count=%lld\nobject_size=%lld\n", - (unsigned long long)ceph_file_layout_su(ci->i_layout), - (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), - (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); - return ret; -} - static struct ceph_vxattr ceph_file_vxattrs[] = { - XATTR_NAME_CEPH(file, layout), - /* The following extended attribute name is deprecated */ { - .name = XATTR_CEPH_PREFIX "layout", - .name_size = sizeof (XATTR_CEPH_PREFIX "layout"), - .getxattr_cb = ceph_vxattrcb_file_layout, - .readonly = true, + .name = "ceph.file.layout", + .name_size = sizeof("ceph.file.layout"), + .getxattr_cb = ceph_vxattrcb_layout, + .readonly = false, + .hidden = false, + .exists_cb = ceph_vxattrcb_layout_exists, }, - { 0 } /* Required table terminator */ + XATTR_LAYOUT_FIELD(file, layout, stripe_unit), + XATTR_LAYOUT_FIELD(file, layout, stripe_count), + XATTR_LAYOUT_FIELD(file, layout, object_size), + XATTR_LAYOUT_FIELD(file, layout, pool), + { .name = NULL, 0 } /* Required table terminator */ }; static size_t ceph_file_vxattrs_name_size; /* total size of all names */ @@ -164,7 +266,8 @@ static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs) size_t size = 0; for (vxattr = vxattrs; vxattr->name; vxattr++) - size += vxattr->name_size; + if (!vxattr->hidden) + size += vxattr->name_size; return size; } @@ -572,13 +675,17 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, if (!ceph_is_valid_xattr(name)) return -ENODATA; - /* let's see if a virtual xattr was requested */ - vxattr = ceph_match_vxattr(inode, name); - spin_lock(&ci->i_ceph_lock); dout("getxattr %p ver=%lld index_ver=%lld\n", inode, ci->i_xattrs.version, ci->i_xattrs.index_version); + /* let's see if a virtual xattr was requested */ + vxattr = ceph_match_vxattr(inode, name); + if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { + err = vxattr->getxattr_cb(ci, value, size); + goto out; + } + if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { goto get_xattr; @@ -592,11 +699,6 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, spin_lock(&ci->i_ceph_lock); - if (vxattr && vxattr->readonly) { - err = vxattr->getxattr_cb(ci, value, size); - goto out; - } - err = __build_xattrs(inode); if (err < 0) goto out; @@ -604,11 +706,8 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, get_xattr: err = -ENODATA; /* == ENOATTR */ xattr = __get_xattr(ci, name); - if (!xattr) { - if (vxattr) - err = vxattr->getxattr_cb(ci, value, size); + if (!xattr) goto out; - } err = -ERANGE; if (size && size < xattr->val_len) @@ -664,23 +763,30 @@ list_xattr: vir_namelen = ceph_vxattrs_name_size(vxattrs); /* adding 1 byte per each variable due to the null termination */ - namelen = vir_namelen + ci->i_xattrs.names_size + ci->i_xattrs.count; + namelen = ci->i_xattrs.names_size + ci->i_xattrs.count; err = -ERANGE; - if (size && namelen > size) + if (size && vir_namelen + namelen > size) goto out; - err = namelen; + err = namelen + vir_namelen; if (size == 0) goto out; names = __copy_xattr_names(ci, names); /* virtual xattr names, too */ - if (vxattrs) + err = namelen; + if (vxattrs) { for (i = 0; vxattrs[i].name; i++) { - len = sprintf(names, "%s", vxattrs[i].name); - names += len + 1; + if (!vxattrs[i].hidden && + !(vxattrs[i].exists_cb && + !vxattrs[i].exists_cb(ci))) { + len = sprintf(names, "%s", vxattrs[i].name); + names += len + 1; + err += len + 1; + } } + } out: spin_unlock(&ci->i_ceph_lock); @@ -782,6 +888,10 @@ int ceph_setxattr(struct dentry *dentry, const char *name, if (vxattr && vxattr->readonly) return -EOPNOTSUPP; + /* pass any unhandled ceph.* xattrs through to the MDS */ + if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) + goto do_sync_unlocked; + /* preallocate memory for xattr name, value, index node */ err = -ENOMEM; newname = kmemdup(name, name_len + 1, GFP_NOFS); @@ -838,6 +948,7 @@ retry: do_sync: spin_unlock(&ci->i_ceph_lock); +do_sync_unlocked: err = ceph_sync_setxattr(dentry, name, value, size, flags); out: kfree(newname); @@ -892,6 +1003,10 @@ int ceph_removexattr(struct dentry *dentry, const char *name) if (vxattr && vxattr->readonly) return -EOPNOTSUPP; + /* pass any unhandled ceph.* xattrs through to the MDS */ + if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN)) + goto do_sync_unlocked; + err = -ENOMEM; spin_lock(&ci->i_ceph_lock); retry: @@ -931,6 +1046,7 @@ retry: return err; do_sync: spin_unlock(&ci->i_ceph_lock); +do_sync_unlocked: err = ceph_send_removexattr(dentry, name); out: return err; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9c4f4b1c97f..9ea0cde3fa9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2512,12 +2512,8 @@ static int ext4_nonda_switch(struct super_block *sb) /* * Start pushing delalloc when 1/2 of free blocks are dirty. */ - if (dirty_blocks && (free_blocks < 2 * dirty_blocks) && - !writeback_in_progress(sb->s_bdi) && - down_read_trylock(&sb->s_umount)) { - writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); - up_read(&sb->s_umount); - } + if (dirty_blocks && (free_blocks < 2 * dirty_blocks)) + try_to_writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE); if (2 * free_blocks < 3 * dirty_blocks || free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 310972b72a6..21f46fb3a10 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -318,8 +318,14 @@ static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) static int write_inode(struct inode *inode, struct writeback_control *wbc) { - if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) - return inode->i_sb->s_op->write_inode(inode, wbc); + int ret; + + if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) { + trace_writeback_write_inode_start(inode, wbc); + ret = inode->i_sb->s_op->write_inode(inode, wbc); + trace_writeback_write_inode(inode, wbc); + return ret; + } return 0; } @@ -450,6 +456,8 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) WARN_ON(!(inode->i_state & I_SYNC)); + trace_writeback_single_inode_start(inode, wbc, nr_to_write); + ret = do_writepages(mapping, wbc); /* @@ -1150,8 +1158,12 @@ void __mark_inode_dirty(struct inode *inode, int flags) * dirty the inode itself */ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { + trace_writeback_dirty_inode_start(inode, flags); + if (sb->s_op->dirty_inode) sb->s_op->dirty_inode(inode, flags); + + trace_writeback_dirty_inode(inode, flags); } /* @@ -1332,47 +1344,43 @@ void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) EXPORT_SYMBOL(writeback_inodes_sb); /** - * writeback_inodes_sb_if_idle - start writeback if none underway + * try_to_writeback_inodes_sb_nr - try to start writeback if none underway * @sb: the superblock - * @reason: reason why some writeback work was initiated + * @nr: the number of pages to write + * @reason: the reason of writeback * - * Invoke writeback_inodes_sb if no writeback is currently underway. + * Invoke writeback_inodes_sb_nr if no writeback is currently underway. * Returns 1 if writeback was started, 0 if not. */ -int writeback_inodes_sb_if_idle(struct super_block *sb, enum wb_reason reason) +int try_to_writeback_inodes_sb_nr(struct super_block *sb, + unsigned long nr, + enum wb_reason reason) { - if (!writeback_in_progress(sb->s_bdi)) { - down_read(&sb->s_umount); - writeback_inodes_sb(sb, reason); - up_read(&sb->s_umount); + if (writeback_in_progress(sb->s_bdi)) return 1; - } else + + if (!down_read_trylock(&sb->s_umount)) return 0; + + writeback_inodes_sb_nr(sb, nr, reason); + up_read(&sb->s_umount); + return 1; } -EXPORT_SYMBOL(writeback_inodes_sb_if_idle); +EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr); /** - * writeback_inodes_sb_nr_if_idle - start writeback if none underway + * try_to_writeback_inodes_sb - try to start writeback if none underway * @sb: the superblock - * @nr: the number of pages to write * @reason: reason why some writeback work was initiated * - * Invoke writeback_inodes_sb if no writeback is currently underway. + * Implement by try_to_writeback_inodes_sb_nr() * Returns 1 if writeback was started, 0 if not. */ -int writeback_inodes_sb_nr_if_idle(struct super_block *sb, - unsigned long nr, - enum wb_reason reason) +int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) { - if (!writeback_in_progress(sb->s_bdi)) { - down_read(&sb->s_umount); - writeback_inodes_sb_nr(sb, nr, reason); - up_read(&sb->s_umount); - return 1; - } else - return 0; + return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); } -EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle); +EXPORT_SYMBOL(try_to_writeback_inodes_sb); /** * sync_inodes_sb - sync sb inode pages diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index a2717408c47..0796c45d0d4 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -11,7 +11,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/nfs_fs.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> #include <linux/kthread.h> @@ -220,10 +220,19 @@ reclaimer(void *ptr) { struct nlm_host *host = (struct nlm_host *) ptr; struct nlm_wait *block; + struct nlm_rqst *req; struct file_lock *fl, *next; u32 nsmstate; struct net *net = host->net; + req = kmalloc(sizeof(*req), GFP_KERNEL); + if (!req) { + printk(KERN_ERR "lockd: reclaimer unable to alloc memory." + " Locks for %s won't be reclaimed!\n", + host->h_name); + return 0; + } + allow_signal(SIGKILL); down_write(&host->h_rwsem); @@ -253,7 +262,7 @@ restart: */ if (signalled()) continue; - if (nlmclnt_reclaim(host, fl) != 0) + if (nlmclnt_reclaim(host, fl, req) != 0) continue; list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted); if (host->h_nsmstate != nsmstate) { @@ -279,5 +288,6 @@ restart: /* Release host handle after use */ nlmclnt_release_host(host); lockd_down(net); + kfree(req); return 0; } diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 366277190b8..7e529c3c45c 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -618,17 +618,15 @@ out_unlock: * RECLAIM: Try to reclaim a lock */ int -nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl) +nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl, + struct nlm_rqst *req) { - struct nlm_rqst reqst, *req; int status; - req = &reqst; memset(req, 0, sizeof(*req)); locks_init_lock(&req->a_args.lock.fl); locks_init_lock(&req->a_res.lock.fl); req->a_host = host; - req->a_flags = 0; /* Set up the argument struct */ nlmclnt_setlockargs(req, fl); diff --git a/fs/lockd/host.c b/fs/lockd/host.c index abdd75d44dd..969d589c848 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -13,6 +13,7 @@ #include <linux/in.h> #include <linux/in6.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> #include <linux/mutex.h> diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index 3c2cfc68363..1812f026960 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c @@ -12,6 +12,7 @@ #include <linux/slab.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/xprtsock.h> #include <linux/sunrpc/svc.h> #include <linux/lockd/lockd.h> diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index d17bb62b06d..97e87415b14 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -13,7 +13,7 @@ #include <linux/slab.h> #include <linux/mutex.h> #include <linux/sunrpc/svc.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/nfsd/nfsfh.h> #include <linux/nfsd/export.h> #include <linux/lockd/lockd.h> diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c index 862a2f16db6..5f7b053720e 100644 --- a/fs/nfs/cache_lib.c +++ b/fs/nfs/cache_lib.c @@ -128,10 +128,13 @@ int nfs_cache_register_net(struct net *net, struct cache_detail *cd) struct super_block *pipefs_sb; int ret = 0; + sunrpc_init_cache_detail(cd); pipefs_sb = rpc_get_sb_net(net); if (pipefs_sb) { ret = nfs_cache_register_sb(pipefs_sb, cd); rpc_put_sb_net(net); + if (ret) + sunrpc_destroy_cache_detail(cd); } return ret; } @@ -151,14 +154,5 @@ void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd) nfs_cache_unregister_sb(pipefs_sb, cd); rpc_put_sb_net(net); } -} - -void nfs_cache_init(struct cache_detail *cd) -{ - sunrpc_init_cache_detail(cd); -} - -void nfs_cache_destroy(struct cache_detail *cd) -{ sunrpc_destroy_cache_detail(cd); } diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 317db95e37f..4116d2c3f52 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -23,8 +23,6 @@ extern struct nfs_cache_defer_req *nfs_cache_defer_req_alloc(void); extern void nfs_cache_defer_req_put(struct nfs_cache_defer_req *dreq); extern int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq); -extern void nfs_cache_init(struct cache_detail *cd); -extern void nfs_cache_destroy(struct cache_detail *cd); extern int nfs_cache_register_net(struct net *net, struct cache_detail *cd); extern void nfs_cache_unregister_net(struct net *net, struct cache_detail *cd); extern int nfs_cache_register_sb(struct super_block *sb, diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index ca4b11ec87a..94552709229 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/dns_resolver.h> #include "dns_resolve.h" @@ -42,6 +43,7 @@ EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); #include <linux/seq_file.h> #include <linux/inet.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/cache.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -142,7 +144,7 @@ static int nfs_dns_upcall(struct cache_detail *cd, ret = nfs_cache_upcall(cd, key->hostname); if (ret) - ret = sunrpc_cache_pipe_upcall(cd, ch, nfs_dns_request); + ret = sunrpc_cache_pipe_upcall(cd, ch); return ret; } @@ -351,60 +353,47 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, } EXPORT_SYMBOL_GPL(nfs_dns_resolve_name); +static struct cache_detail nfs_dns_resolve_template = { + .owner = THIS_MODULE, + .hash_size = NFS_DNS_HASHTBL_SIZE, + .name = "dns_resolve", + .cache_put = nfs_dns_ent_put, + .cache_upcall = nfs_dns_upcall, + .cache_request = nfs_dns_request, + .cache_parse = nfs_dns_parse, + .cache_show = nfs_dns_show, + .match = nfs_dns_match, + .init = nfs_dns_ent_init, + .update = nfs_dns_ent_update, + .alloc = nfs_dns_ent_alloc, +}; + + int nfs_dns_resolver_cache_init(struct net *net) { - int err = -ENOMEM; + int err; struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd; - struct cache_head **tbl; - cd = kzalloc(sizeof(struct cache_detail), GFP_KERNEL); - if (cd == NULL) - goto err_cd; - - tbl = kzalloc(NFS_DNS_HASHTBL_SIZE * sizeof(struct cache_head *), - GFP_KERNEL); - if (tbl == NULL) - goto err_tbl; - - cd->owner = THIS_MODULE, - cd->hash_size = NFS_DNS_HASHTBL_SIZE, - cd->hash_table = tbl, - cd->name = "dns_resolve", - cd->cache_put = nfs_dns_ent_put, - cd->cache_upcall = nfs_dns_upcall, - cd->cache_parse = nfs_dns_parse, - cd->cache_show = nfs_dns_show, - cd->match = nfs_dns_match, - cd->init = nfs_dns_ent_init, - cd->update = nfs_dns_ent_update, - cd->alloc = nfs_dns_ent_alloc, - - nfs_cache_init(cd); - err = nfs_cache_register_net(net, cd); + nn->nfs_dns_resolve = cache_create_net(&nfs_dns_resolve_template, net); + if (IS_ERR(nn->nfs_dns_resolve)) + return PTR_ERR(nn->nfs_dns_resolve); + + err = nfs_cache_register_net(net, nn->nfs_dns_resolve); if (err) goto err_reg; - nn->nfs_dns_resolve = cd; return 0; err_reg: - nfs_cache_destroy(cd); - kfree(cd->hash_table); -err_tbl: - kfree(cd); -err_cd: + cache_destroy_net(nn->nfs_dns_resolve, net); return err; } void nfs_dns_resolver_cache_destroy(struct net *net) { struct nfs_net *nn = net_generic(net, nfs_net_id); - struct cache_detail *cd = nn->nfs_dns_resolve; - nfs_cache_unregister_net(net, cd); - nfs_cache_destroy(cd); - kfree(cd->hash_table); - kfree(cd); + nfs_cache_unregister_net(net, nn->nfs_dns_resolve); + cache_destroy_net(nn->nfs_dns_resolve, net); } static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 47d10087239..ac4fc9a8fdb 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -6,6 +6,7 @@ #include <linux/nfs_fs.h> #include <linux/nfs_idmap.h> #include <linux/nfs_mount.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/xprt.h> #include <linux/sunrpc/bc_xprt.h> diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index b720064bcd7..1fe284f01f8 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -31,6 +31,7 @@ #include <linux/nfs_fs.h> #include <linux/vmalloc.h> #include <linux/module.h> +#include <linux/sunrpc/addr.h> #include "internal.h" #include "nfs4session.h" diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 1e09eb78543..0dd766079e1 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -14,6 +14,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/vfs.h> #include <linux/inet.h> #include "internal.h" diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a9dc5fc2995..17b32b72245 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -31,6 +31,7 @@ #include <linux/errno.h> #include <linux/unistd.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/xprtsock.h> diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 93cc9d34c45..87fd1410b73 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -12,6 +12,10 @@ /* * Representation of a reply cache entry. + * + * Note that we use a sockaddr_in6 to hold the address instead of the more + * typical sockaddr_storage. This is for space reasons, since sockaddr_storage + * is much larger than a sockaddr_in6. */ struct svc_cacherep { struct hlist_node c_hash; @@ -20,11 +24,13 @@ struct svc_cacherep { unsigned char c_state, /* unused, inprog, done */ c_type, /* status, buffer */ c_secure : 1; /* req came from port < 1024 */ - struct sockaddr_in c_addr; + struct sockaddr_in6 c_addr; __be32 c_xid; u32 c_prot; u32 c_proc; u32 c_vers; + unsigned int c_len; + __wsum c_csum; unsigned long c_timestamp; union { struct kvec u_vec; @@ -46,8 +52,7 @@ enum { enum { RC_DROPIT, RC_REPLY, - RC_DOIT, - RC_INTR + RC_DOIT }; /* @@ -67,6 +72,12 @@ enum { */ #define RC_DELAY (HZ/5) +/* Cache entries expire after this time period */ +#define RC_EXPIRE (120 * HZ) + +/* Checksum this amount of the request */ +#define RC_CSUMLEN (256U) + int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *); diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5681c5906f0..5f38ea36e26 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -67,11 +67,6 @@ static void expkey_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, expkey_request); -} - static struct svc_expkey *svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, struct svc_expkey *old); static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *); @@ -245,7 +240,7 @@ static struct cache_detail svc_expkey_cache_template = { .hash_size = EXPKEY_HASHMAX, .name = "nfsd.fh", .cache_put = expkey_put, - .cache_upcall = expkey_upcall, + .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, .match = expkey_match, @@ -315,6 +310,7 @@ static void svc_export_put(struct kref *ref) path_put(&exp->ex_path); auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); + kfree(exp->ex_uuid); kfree(exp); } @@ -337,11 +333,6 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, svc_export_request); -} - static struct svc_export *svc_export_update(struct svc_export *new, struct svc_export *old); static struct svc_export *svc_export_lookup(struct svc_export *); @@ -674,6 +665,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem) new->ex_fslocs.locations = NULL; new->ex_fslocs.locations_count = 0; new->ex_fslocs.migrated = 0; + new->ex_uuid = NULL; new->cd = item->cd; } @@ -715,7 +707,7 @@ static struct cache_detail svc_export_cache_template = { .hash_size = EXPORT_HASHMAX, .name = "nfsd.export", .cache_put = svc_export_put, - .cache_upcall = svc_export_upcall, + .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, .match = svc_export_match, diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c index 497584c7036..d620e7f8142 100644 --- a/fs/nfsd/fault_inject.c +++ b/fs/nfsd/fault_inject.c @@ -9,7 +9,7 @@ #include <linux/debugfs.h> #include <linux/module.h> #include <linux/nsproxy.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <asm/uaccess.h> #include "state.h" diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 0ce12346df9..4832fd819f8 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -140,12 +140,6 @@ idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -idtoname_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, idtoname_request); -} - -static int idtoname_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -192,7 +186,7 @@ static struct cache_detail idtoname_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.idtoname", .cache_put = ent_put, - .cache_upcall = idtoname_upcall, + .cache_request = idtoname_request, .cache_parse = idtoname_parse, .cache_show = idtoname_show, .warn_no_listener = warn_no_idmapd, @@ -321,12 +315,6 @@ nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, } static int -nametoid_upcall(struct cache_detail *cd, struct cache_head *ch) -{ - return sunrpc_cache_pipe_upcall(cd, ch, nametoid_request); -} - -static int nametoid_match(struct cache_head *ca, struct cache_head *cb) { struct ent *a = container_of(ca, struct ent, h); @@ -365,7 +353,7 @@ static struct cache_detail nametoid_cache_template = { .hash_size = ENT_HASHMAX, .name = "nfs4.nametoid", .cache_put = ent_put, - .cache_upcall = nametoid_upcall, + .cache_request = nametoid_request, .cache_parse = nametoid_parse, .cache_show = nametoid_show, .warn_no_listener = warn_no_idmapd, diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9d1c5dba2bb..ae73175e6e6 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -993,14 +993,15 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (!buf) return nfserr_jukebox; + p = buf; status = nfsd4_encode_fattr(&cstate->current_fh, cstate->current_fh.fh_export, - cstate->current_fh.fh_dentry, buf, - &count, verify->ve_bmval, + cstate->current_fh.fh_dentry, &p, + count, verify->ve_bmval, rqstp, 0); /* this means that nfsd4_encode_fattr() ran out of space */ - if (status == nfserr_resource && count == 0) + if (status == nfserr_resource) status = nfserr_not_same; if (status) goto out_kfree; diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4914af4a817..899ca26dd19 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -1185,6 +1185,12 @@ bin_to_hex_dup(const unsigned char *src, int srclen) static int nfsd4_umh_cltrack_init(struct net __attribute__((unused)) *net) { + /* XXX: The usermode helper s not working in container yet. */ + if (net != &init_net) { + WARN(1, KERN_ERR "NFSD: attempt to initialize umh client " + "tracking in a container!\n"); + return -EINVAL; + } return nfsd4_umh_cltrack_upcall("init", NULL, NULL); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9e7103b6e0a..16d39c6c4fb 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -40,7 +40,7 @@ #include <linux/pagemap.h> #include <linux/ratelimit.h> #include <linux/sunrpc/svcauth_gss.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include "xdr4.h" #include "vfs.h" #include "current_stateid.h" @@ -261,33 +261,46 @@ static inline int get_new_stid(struct nfs4_stid *stid) return new_stid; } -static void init_stid(struct nfs4_stid *stid, struct nfs4_client *cl, unsigned char type) +static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct +kmem_cache *slab) { - stateid_t *s = &stid->sc_stateid; + struct idr *stateids = &cl->cl_stateids; + static int min_stateid = 0; + struct nfs4_stid *stid; int new_id; - stid->sc_type = type; + stid = kmem_cache_alloc(slab, GFP_KERNEL); + if (!stid) + return NULL; + + if (!idr_pre_get(stateids, GFP_KERNEL)) + goto out_free; + if (idr_get_new_above(stateids, stid, min_stateid, &new_id)) + goto out_free; stid->sc_client = cl; - s->si_opaque.so_clid = cl->cl_clientid; - new_id = get_new_stid(stid); - s->si_opaque.so_id = (u32)new_id; + stid->sc_type = 0; + stid->sc_stateid.si_opaque.so_id = new_id; + stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid; /* Will be incremented before return to client: */ - s->si_generation = 0; -} - -static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) -{ - struct idr *stateids = &cl->cl_stateids; + stid->sc_stateid.si_generation = 0; - if (!idr_pre_get(stateids, GFP_KERNEL)) - return NULL; /* - * Note: if we fail here (or any time between now and the time - * we actually get the new idr), we won't need to undo the idr - * preallocation, since the idr code caps the number of - * preallocated entries. + * It shouldn't be a problem to reuse an opaque stateid value. + * I don't think it is for 4.1. But with 4.0 I worry that, for + * example, a stray write retransmission could be accepted by + * the server when it should have been rejected. Therefore, + * adopt a trick from the sctp code to attempt to maximize the + * amount of time until an id is reused, by ensuring they always + * "increase" (mod INT_MAX): */ - return kmem_cache_alloc(slab, GFP_KERNEL); + + min_stateid = new_id+1; + if (min_stateid == INT_MAX) + min_stateid = 0; + return stid; +out_free: + kfree(stid); + return NULL; } static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) @@ -316,7 +329,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab)); if (dp == NULL) return dp; - init_stid(&dp->dl_stid, clp, NFS4_DELEG_STID); + dp->dl_stid.sc_type = NFS4_DELEG_STID; /* * delegation seqid's are never incremented. The 4.1 special * meaning of seqid 0 isn't meaningful, really, but let's avoid @@ -337,13 +350,21 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv return dp; } +static void free_stid(struct nfs4_stid *s, struct kmem_cache *slab) +{ + struct idr *stateids = &s->sc_client->cl_stateids; + + idr_remove(stateids, s->sc_stateid.si_opaque.so_id); + kmem_cache_free(slab, s); +} + void nfs4_put_delegation(struct nfs4_delegation *dp) { if (atomic_dec_and_test(&dp->dl_count)) { dprintk("NFSD: freeing dp %p\n",dp); put_nfs4_file(dp->dl_file); - kmem_cache_free(deleg_slab, dp); + free_stid(&dp->dl_stid, deleg_slab); num_delegations--; } } @@ -360,9 +381,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp) static void unhash_stid(struct nfs4_stid *s) { - struct idr *stateids = &s->sc_client->cl_stateids; - - idr_remove(stateids, s->sc_stateid.si_opaque.so_id); + s->sc_type = 0; } /* Called under the state lock. */ @@ -519,7 +538,7 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp) static void free_generic_stateid(struct nfs4_ol_stateid *stp) { - kmem_cache_free(stateid_slab, stp); + free_stid(&stp->st_stid, stateid_slab); } static void release_lock_stateid(struct nfs4_ol_stateid *stp) @@ -905,7 +924,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan, new = __alloc_session(slotsize, numslots); if (!new) { - nfsd4_put_drc_mem(slotsize, fchan->maxreqs); + nfsd4_put_drc_mem(slotsize, numslots); return NULL; } init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn); @@ -1048,7 +1067,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) static inline void free_client(struct nfs4_client *clp) { - struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id); lockdep_assert_held(&nn->client_lock); while (!list_empty(&clp->cl_sessions)) { @@ -1060,6 +1079,7 @@ free_client(struct nfs4_client *clp) } free_svc_cred(&clp->cl_cred); kfree(clp->cl_name.data); + idr_destroy(&clp->cl_stateids); kfree(clp); } @@ -1258,7 +1278,12 @@ static void gen_confirm(struct nfs4_client *clp) static struct nfs4_stid *find_stateid(struct nfs4_client *cl, stateid_t *t) { - return idr_find(&cl->cl_stateids, t->si_opaque.so_id); + struct nfs4_stid *ret; + + ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id); + if (!ret || !ret->sc_type) + return NULL; + return ret; } static struct nfs4_stid *find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask) @@ -1844,11 +1869,12 @@ nfsd4_create_session(struct svc_rqst *rqstp, /* cache solo and embedded create sessions under the state lock */ nfsd4_cache_create_session(cr_ses, cs_slot, status); -out: nfs4_unlock_state(); +out: dprintk("%s returns %d\n", __func__, ntohl(status)); return status; out_free_conn: + nfs4_unlock_state(); free_conn(conn); out_free_session: __free_session(new); @@ -2443,9 +2469,8 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, str static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) { struct nfs4_openowner *oo = open->op_openowner; - struct nfs4_client *clp = oo->oo_owner.so_client; - init_stid(&stp->st_stid, clp, NFS4_OPEN_STID); + stp->st_stid.sc_type = NFS4_OPEN_STID; INIT_LIST_HEAD(&stp->st_lockowners); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); list_add(&stp->st_perfile, &fp->fi_stateids); @@ -4031,7 +4056,7 @@ alloc_init_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp, struct stp = nfs4_alloc_stateid(clp); if (stp == NULL) return NULL; - init_stid(&stp->st_stid, clp, NFS4_LOCK_STID); + stp->st_stid.sc_type = NFS4_LOCK_STID; list_add(&stp->st_perfile, &fp->fi_stateids); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); stp->st_stateowner = &lo->lo_owner; @@ -4913,16 +4938,6 @@ nfs4_state_start_net(struct net *net) struct nfsd_net *nn = net_generic(net, nfsd_net_id); int ret; - /* - * FIXME: For now, we hang most of the pernet global stuff off of - * init_net until nfsd is fully containerized. Eventually, we'll - * need to pass a net pointer into this function, take a reference - * to that instead and then do most of the rest of this on a per-net - * basis. - */ - if (net != &init_net) - return -EINVAL; - ret = nfs4_state_create_net(net); if (ret) return ret; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 8ca6d17f6cf..01168865dd3 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2024,12 +2024,11 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) * Note: @fhp can be NULL; in this case, we might have to compose the filehandle * ourselves. * - * @countp is the buffer size in _words_; upon successful return this becomes - * replaced with the number of words written. + * countp is the buffer size in _words_ */ __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval, + struct dentry *dentry, __be32 **buffer, int count, u32 *bmval, struct svc_rqst *rqstp, int ignore_crossmnt) { u32 bmval0 = bmval[0]; @@ -2038,12 +2037,12 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, struct kstat stat; struct svc_fh tempfh; struct kstatfs statfs; - int buflen = *countp << 2; + int buflen = count << 2; __be32 *attrlenp; u32 dummy; u64 dummy64; u32 rdattr_err = 0; - __be32 *p = buffer; + __be32 *p = *buffer; __be32 status; int err; int aclsupport = 0; @@ -2447,7 +2446,7 @@ out_acl: } *attrlenp = htonl((char *)p - (char *)attrlenp - 4); - *countp = p - buffer; + *buffer = p; status = nfs_ok; out: @@ -2459,7 +2458,6 @@ out_nfserr: status = nfserrno(err); goto out; out_resource: - *countp = 0; status = nfserr_resource; goto out; out_serverfault: @@ -2478,7 +2476,7 @@ static inline int attributes_need_mount(u32 *bmval) static __be32 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd, - const char *name, int namlen, __be32 *p, int *buflen) + const char *name, int namlen, __be32 **p, int buflen) { struct svc_export *exp = cd->rd_fhp->fh_export; struct dentry *dentry; @@ -2584,10 +2582,9 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ - nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen); + nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen); switch (nfserr) { case nfs_ok: - p += buflen; break; case nfserr_resource: nfserr = nfserr_toosmall; @@ -2714,10 +2711,8 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, - resp->p, &buflen, getattr->ga_bmval, + &resp->p, buflen, getattr->ga_bmval, resp->rqstp, 0); - if (!nfserr) - resp->p += buflen; return nfserr; } diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index da3dbd0f897..62c1ee128ae 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -9,22 +9,22 @@ */ #include <linux/slab.h> +#include <linux/sunrpc/addr.h> +#include <linux/highmem.h> +#include <net/checksum.h> #include "nfsd.h" #include "cache.h" -/* Size of reply cache. Common values are: - * 4.3BSD: 128 - * 4.4BSD: 256 - * Solaris2: 1024 - * DEC Unix: 512-4096 - */ -#define CACHESIZE 1024 +#define NFSDDBG_FACILITY NFSDDBG_REPCACHE + #define HASHSIZE 64 static struct hlist_head * cache_hash; static struct list_head lru_head; -static int cache_disabled = 1; +static struct kmem_cache *drc_slab; +static unsigned int num_drc_entries; +static unsigned int max_drc_entries; /* * Calculate the hash index from an XID. @@ -37,6 +37,14 @@ static inline u32 request_hash(u32 xid) } static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); +static void cache_cleaner_func(struct work_struct *unused); +static int nfsd_reply_cache_shrink(struct shrinker *shrink, + struct shrink_control *sc); + +struct shrinker nfsd_reply_cache_shrinker = { + .shrink = nfsd_reply_cache_shrink, + .seeks = 1, +}; /* * locking for the reply cache: @@ -44,30 +52,86 @@ static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec); * Otherwise, it when accessing _prev or _next, the lock must be held. */ static DEFINE_SPINLOCK(cache_lock); +static DECLARE_DELAYED_WORK(cache_cleaner, cache_cleaner_func); -int nfsd_reply_cache_init(void) +/* + * Put a cap on the size of the DRC based on the amount of available + * low memory in the machine. + * + * 64MB: 8192 + * 128MB: 11585 + * 256MB: 16384 + * 512MB: 23170 + * 1GB: 32768 + * 2GB: 46340 + * 4GB: 65536 + * 8GB: 92681 + * 16GB: 131072 + * + * ...with a hard cap of 256k entries. In the worst case, each entry will be + * ~1k, so the above numbers should give a rough max of the amount of memory + * used in k. + */ +static unsigned int +nfsd_cache_size_limit(void) +{ + unsigned int limit; + unsigned long low_pages = totalram_pages - totalhigh_pages; + + limit = (16 * int_sqrt(low_pages)) << (PAGE_SHIFT-10); + return min_t(unsigned int, limit, 256*1024); +} + +static struct svc_cacherep * +nfsd_reply_cache_alloc(void) { struct svc_cacherep *rp; - int i; - INIT_LIST_HEAD(&lru_head); - i = CACHESIZE; - while (i) { - rp = kmalloc(sizeof(*rp), GFP_KERNEL); - if (!rp) - goto out_nomem; - list_add(&rp->c_lru, &lru_head); + rp = kmem_cache_alloc(drc_slab, GFP_KERNEL); + if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; + INIT_LIST_HEAD(&rp->c_lru); INIT_HLIST_NODE(&rp->c_hash); - i--; } + return rp; +} - cache_hash = kcalloc (HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); +static void +nfsd_reply_cache_free_locked(struct svc_cacherep *rp) +{ + if (rp->c_type == RC_REPLBUFF) + kfree(rp->c_replvec.iov_base); + hlist_del(&rp->c_hash); + list_del(&rp->c_lru); + --num_drc_entries; + kmem_cache_free(drc_slab, rp); +} + +static void +nfsd_reply_cache_free(struct svc_cacherep *rp) +{ + spin_lock(&cache_lock); + nfsd_reply_cache_free_locked(rp); + spin_unlock(&cache_lock); +} + +int nfsd_reply_cache_init(void) +{ + register_shrinker(&nfsd_reply_cache_shrinker); + drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep), + 0, 0, NULL); + if (!drc_slab) + goto out_nomem; + + cache_hash = kcalloc(HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL); if (!cache_hash) goto out_nomem; - cache_disabled = 0; + INIT_LIST_HEAD(&lru_head); + max_drc_entries = nfsd_cache_size_limit(); + num_drc_entries = 0; + return 0; out_nomem: printk(KERN_ERR "nfsd: failed to allocate reply cache\n"); @@ -79,27 +143,33 @@ void nfsd_reply_cache_shutdown(void) { struct svc_cacherep *rp; + unregister_shrinker(&nfsd_reply_cache_shrinker); + cancel_delayed_work_sync(&cache_cleaner); + while (!list_empty(&lru_head)) { rp = list_entry(lru_head.next, struct svc_cacherep, c_lru); - if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF) - kfree(rp->c_replvec.iov_base); - list_del(&rp->c_lru); - kfree(rp); + nfsd_reply_cache_free_locked(rp); } - cache_disabled = 1; - kfree (cache_hash); cache_hash = NULL; + + if (drc_slab) { + kmem_cache_destroy(drc_slab); + drc_slab = NULL; + } } /* - * Move cache entry to end of LRU list + * Move cache entry to end of LRU list, and queue the cleaner to run if it's + * not already scheduled. */ static void lru_put_end(struct svc_cacherep *rp) { + rp->c_timestamp = jiffies; list_move_tail(&rp->c_lru, &lru_head); + schedule_delayed_work(&cache_cleaner, RC_EXPIRE); } /* @@ -112,82 +182,214 @@ hash_refile(struct svc_cacherep *rp) hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid)); } +static inline bool +nfsd_cache_entry_expired(struct svc_cacherep *rp) +{ + return rp->c_state != RC_INPROG && + time_after(jiffies, rp->c_timestamp + RC_EXPIRE); +} + +/* + * Walk the LRU list and prune off entries that are older than RC_EXPIRE. + * Also prune the oldest ones when the total exceeds the max number of entries. + */ +static void +prune_cache_entries(void) +{ + struct svc_cacherep *rp, *tmp; + + list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) { + if (!nfsd_cache_entry_expired(rp) && + num_drc_entries <= max_drc_entries) + break; + nfsd_reply_cache_free_locked(rp); + } + + /* + * Conditionally rearm the job. If we cleaned out the list, then + * cancel any pending run (since there won't be any work to do). + * Otherwise, we rearm the job or modify the existing one to run in + * RC_EXPIRE since we just ran the pruner. + */ + if (list_empty(&lru_head)) + cancel_delayed_work(&cache_cleaner); + else + mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE); +} + +static void +cache_cleaner_func(struct work_struct *unused) +{ + spin_lock(&cache_lock); + prune_cache_entries(); + spin_unlock(&cache_lock); +} + +static int +nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc) +{ + unsigned int num; + + spin_lock(&cache_lock); + if (sc->nr_to_scan) + prune_cache_entries(); + num = num_drc_entries; + spin_unlock(&cache_lock); + + return num; +} + +/* + * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes + */ +static __wsum +nfsd_cache_csum(struct svc_rqst *rqstp) +{ + int idx; + unsigned int base; + __wsum csum; + struct xdr_buf *buf = &rqstp->rq_arg; + const unsigned char *p = buf->head[0].iov_base; + size_t csum_len = min_t(size_t, buf->head[0].iov_len + buf->page_len, + RC_CSUMLEN); + size_t len = min(buf->head[0].iov_len, csum_len); + + /* rq_arg.head first */ + csum = csum_partial(p, len, 0); + csum_len -= len; + + /* Continue into page array */ + idx = buf->page_base / PAGE_SIZE; + base = buf->page_base & ~PAGE_MASK; + while (csum_len) { + p = page_address(buf->pages[idx]) + base; + len = min_t(size_t, PAGE_SIZE - base, csum_len); + csum = csum_partial(p, len, csum); + csum_len -= len; + base = 0; + ++idx; + } + return csum; +} + +/* + * Search the request hash for an entry that matches the given rqstp. + * Must be called with cache_lock held. Returns the found entry or + * NULL on failure. + */ +static struct svc_cacherep * +nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum) +{ + struct svc_cacherep *rp; + struct hlist_head *rh; + __be32 xid = rqstp->rq_xid; + u32 proto = rqstp->rq_prot, + vers = rqstp->rq_vers, + proc = rqstp->rq_proc; + + rh = &cache_hash[request_hash(xid)]; + hlist_for_each_entry(rp, rh, c_hash) { + if (xid == rp->c_xid && proc == rp->c_proc && + proto == rp->c_prot && vers == rp->c_vers && + rqstp->rq_arg.len == rp->c_len && csum == rp->c_csum && + rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) && + rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr)) + return rp; + } + return NULL; +} + /* * Try to find an entry matching the current call in the cache. When none - * is found, we grab the oldest unlocked entry off the LRU list. - * Note that no operation within the loop may sleep. + * is found, we try to grab the oldest expired entry off the LRU list. If + * a suitable one isn't there, then drop the cache_lock and allocate a + * new one, then search again in case one got inserted while this thread + * didn't hold the lock. */ int nfsd_cache_lookup(struct svc_rqst *rqstp) { - struct hlist_head *rh; - struct svc_cacherep *rp; + struct svc_cacherep *rp, *found; __be32 xid = rqstp->rq_xid; u32 proto = rqstp->rq_prot, vers = rqstp->rq_vers, proc = rqstp->rq_proc; + __wsum csum; unsigned long age; int type = rqstp->rq_cachetype; int rtn; rqstp->rq_cacherep = NULL; - if (cache_disabled || type == RC_NOCACHE) { + if (type == RC_NOCACHE) { nfsdstats.rcnocache++; return RC_DOIT; } + csum = nfsd_cache_csum(rqstp); + spin_lock(&cache_lock); rtn = RC_DOIT; - rh = &cache_hash[request_hash(xid)]; - hlist_for_each_entry(rp, rh, c_hash) { - if (rp->c_state != RC_UNUSED && - xid == rp->c_xid && proc == rp->c_proc && - proto == rp->c_prot && vers == rp->c_vers && - time_before(jiffies, rp->c_timestamp + 120*HZ) && - memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) { - nfsdstats.rchits++; - goto found_entry; + rp = nfsd_cache_search(rqstp, csum); + if (rp) + goto found_entry; + + /* Try to use the first entry on the LRU */ + if (!list_empty(&lru_head)) { + rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); + if (nfsd_cache_entry_expired(rp) || + num_drc_entries >= max_drc_entries) { + lru_put_end(rp); + prune_cache_entries(); + goto setup_entry; } } - nfsdstats.rcmisses++; - /* This loop shouldn't take more than a few iterations normally */ - { - int safe = 0; - list_for_each_entry(rp, &lru_head, c_lru) { - if (rp->c_state != RC_INPROG) - break; - if (safe++ > CACHESIZE) { - printk("nfsd: loop in repcache LRU list\n"); - cache_disabled = 1; - goto out; - } + /* Drop the lock and allocate a new entry */ + spin_unlock(&cache_lock); + rp = nfsd_reply_cache_alloc(); + if (!rp) { + dprintk("nfsd: unable to allocate DRC entry!\n"); + return RC_DOIT; } + spin_lock(&cache_lock); + ++num_drc_entries; + + /* + * Must search again just in case someone inserted one + * after we dropped the lock above. + */ + found = nfsd_cache_search(rqstp, csum); + if (found) { + nfsd_reply_cache_free_locked(rp); + rp = found; + goto found_entry; } - /* All entries on the LRU are in-progress. This should not happen */ - if (&rp->c_lru == &lru_head) { - static int complaints; - - printk(KERN_WARNING "nfsd: all repcache entries locked!\n"); - if (++complaints > 5) { - printk(KERN_WARNING "nfsd: disabling repcache.\n"); - cache_disabled = 1; - } - goto out; - } + /* + * We're keeping the one we just allocated. Are we now over the + * limit? Prune one off the tip of the LRU in trade for the one we + * just allocated if so. + */ + if (num_drc_entries >= max_drc_entries) + nfsd_reply_cache_free_locked(list_first_entry(&lru_head, + struct svc_cacherep, c_lru)); +setup_entry: + nfsdstats.rcmisses++; rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; rp->c_xid = xid; rp->c_proc = proc; - memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr)); + rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp)); + rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); rp->c_prot = proto; rp->c_vers = vers; - rp->c_timestamp = jiffies; + rp->c_len = rqstp->rq_arg.len; + rp->c_csum = csum; hash_refile(rp); + lru_put_end(rp); /* release any buffer */ if (rp->c_type == RC_REPLBUFF) { @@ -200,9 +402,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) return rtn; found_entry: + nfsdstats.rchits++; /* We found a matching entry which is either in progress or done. */ age = jiffies - rp->c_timestamp; - rp->c_timestamp = jiffies; lru_put_end(rp); rtn = RC_DROPIT; @@ -231,7 +433,7 @@ found_entry: break; default: printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free_locked(rp); } goto out; @@ -256,11 +458,11 @@ found_entry: void nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) { - struct svc_cacherep *rp; + struct svc_cacherep *rp = rqstp->rq_cacherep; struct kvec *resv = &rqstp->rq_res.head[0], *cachv; int len; - if (!(rp = rqstp->rq_cacherep) || cache_disabled) + if (!rp) return; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); @@ -268,7 +470,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) /* Don't cache excessive amounts of data and XDR failures */ if (!statp || len > (256 >> 2)) { - rp->c_state = RC_UNUSED; + nfsd_reply_cache_free(rp); return; } @@ -282,21 +484,21 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) cachv = &rp->c_replvec; cachv->iov_base = kmalloc(len << 2, GFP_KERNEL); if (!cachv->iov_base) { - spin_lock(&cache_lock); - rp->c_state = RC_UNUSED; - spin_unlock(&cache_lock); + nfsd_reply_cache_free(rp); return; } cachv->iov_len = len << 2; memcpy(cachv->iov_base, statp, len << 2); break; + case RC_NOCACHE: + nfsd_reply_cache_free(rp); + return; } spin_lock(&cache_lock); lru_put_end(rp); rp->c_secure = rqstp->rq_secure; rp->c_type = cachetype; rp->c_state = RC_DONE; - rp->c_timestamp = jiffies; spin_unlock(&cache_lock); return; } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 2db7021b01a..13a21c8fca4 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -10,7 +10,7 @@ #include <linux/sunrpc/svcsock.h> #include <linux/lockd/lockd.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/gss_api.h> #include <linux/sunrpc/gss_krb5_enctypes.h> #include <linux/sunrpc/rpc_pipe_fs.h> @@ -125,11 +125,11 @@ static const struct file_operations transaction_ops = { .llseek = default_llseek, }; -static int exports_open(struct inode *inode, struct file *file) +static int exports_net_open(struct net *net, struct file *file) { int err; struct seq_file *seq; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); err = seq_open(file, &nfs_exports_op); if (err) @@ -140,8 +140,26 @@ static int exports_open(struct inode *inode, struct file *file) return 0; } -static const struct file_operations exports_operations = { - .open = exports_open, +static int exports_proc_open(struct inode *inode, struct file *file) +{ + return exports_net_open(current->nsproxy->net_ns, file); +} + +static const struct file_operations exports_proc_operations = { + .open = exports_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + +static int exports_nfsd_open(struct inode *inode, struct file *file) +{ + return exports_net_open(inode->i_sb->s_fs_info, file); +} + +static const struct file_operations exports_nfsd_operations = { + .open = exports_nfsd_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, @@ -220,6 +238,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) struct sockaddr *sap = (struct sockaddr *)&address; size_t salen = sizeof(address); char *fo_path; + struct net *net = file->f_dentry->d_sb->s_fs_info; /* sanity check */ if (size == 0) @@ -232,7 +251,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - if (rpc_pton(&init_net, fo_path, size, sap, salen) == 0) + if (rpc_pton(net, fo_path, size, sap, salen) == 0) return -EINVAL; return nlmsvc_unlock_all_by_ip(sap); @@ -317,6 +336,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) int len; struct auth_domain *dom; struct knfsd_fh fh; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size == 0) return -EINVAL; @@ -352,7 +372,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size) if (!dom) return -ENOMEM; - len = exp_rootfh(&init_net, dom, path, &fh, maxsize); + len = exp_rootfh(net, dom, path, &fh, maxsize); auth_domain_put(dom); if (len) return len; @@ -396,7 +416,7 @@ static ssize_t write_threads(struct file *file, char *buf, size_t size) { char *mesg = buf; int rv; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; if (size > 0) { int newthreads; @@ -447,7 +467,7 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size) int len; int npools; int *nthreads; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); npools = nfsd_nrpools(net); @@ -510,7 +530,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) unsigned minor; ssize_t tlen = 0; char *sep; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size>0) { @@ -534,7 +554,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) else num = simple_strtol(vers, &minorp, 0); if (*minorp == '.') { - if (num < 4) + if (num != 4) return -EINVAL; minor = simple_strtoul(minorp+1, NULL, 0); if (minor == 0) @@ -792,7 +812,7 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size, static ssize_t write_ports(struct file *file, char *buf, size_t size) { ssize_t rv; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; mutex_lock(&nfsd_mutex); rv = __write_ports(file, buf, size, net); @@ -827,7 +847,7 @@ int nfsd_max_blksize; static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) { char *mesg = buf; - struct net *net = &init_net; + struct net *net = file->f_dentry->d_sb->s_fs_info; struct nfsd_net *nn = net_generic(net, nfsd_net_id); if (size > 0) { @@ -923,7 +943,8 @@ static ssize_t nfsd4_write_time(struct file *file, char *buf, size_t size, */ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) { - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_lease, nn); } @@ -939,7 +960,8 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size) */ static ssize_t write_gracetime(struct file *file, char *buf, size_t size) { - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); return nfsd4_write_time(file, buf, size, &nn->nfsd4_grace, nn); } @@ -995,7 +1017,8 @@ static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size, static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) { ssize_t rv; - struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id); + struct net *net = file->f_dentry->d_sb->s_fs_info; + struct nfsd_net *nn = net_generic(net, nfsd_net_id); mutex_lock(&nfsd_mutex); rv = __write_recoverydir(file, buf, size, nn); @@ -1013,7 +1036,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr nfsd_files[] = { - [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, + [NFSD_List] = {"exports", &exports_nfsd_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, [NFSD_FO_UnlockIP] = {"unlock_ip", @@ -1037,20 +1060,35 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) #endif /* last one */ {""} }; - return simple_fill_super(sb, 0x6e667364, nfsd_files); + struct net *net = data; + int ret; + + ret = simple_fill_super(sb, 0x6e667364, nfsd_files); + if (ret) + return ret; + sb->s_fs_info = get_net(net); + return 0; } static struct dentry *nfsd_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - return mount_single(fs_type, flags, data, nfsd_fill_super); + return mount_ns(fs_type, flags, current->nsproxy->net_ns, nfsd_fill_super); +} + +static void nfsd_umount(struct super_block *sb) +{ + struct net *net = sb->s_fs_info; + + kill_litter_super(sb); + put_net(net); } static struct file_system_type nfsd_fs_type = { .owner = THIS_MODULE, .name = "nfsd", .mount = nfsd_mount, - .kill_sb = kill_litter_super, + .kill_sb = nfsd_umount, }; #ifdef CONFIG_PROC_FS @@ -1061,7 +1099,8 @@ static int create_proc_exports_entry(void) entry = proc_mkdir("fs/nfs", NULL); if (!entry) return -ENOMEM; - entry = proc_create("exports", 0, entry, &exports_operations); + entry = proc_create("exports", 0, entry, + &exports_proc_operations); if (!entry) return -ENOMEM; return 0; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index be7af509930..262df5ccbf5 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -652,7 +652,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /* Check whether we have this call in the cache. */ switch (nfsd_cache_lookup(rqstp)) { - case RC_INTR: case RC_DROPIT: return 0; case RC_REPLY: @@ -703,8 +702,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) int nfsd_pool_stats_open(struct inode *inode, struct file *file) { int ret; - struct net *net = &init_net; - struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct nfsd_net *nn = net_generic(inode->i_sb->s_fs_info, nfsd_net_id); mutex_lock(&nfsd_mutex); if (nn->nfsd_serv == NULL) { @@ -721,7 +719,7 @@ int nfsd_pool_stats_open(struct inode *inode, struct file *file) int nfsd_pool_stats_release(struct inode *inode, struct file *file) { int ret = seq_release(inode, file); - struct net *net = &init_net; + struct net *net = inode->i_sb->s_fs_info; mutex_lock(&nfsd_mutex); /* this function really, really should have been called svc_put() */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 0889bfb43dc..546f8983ecf 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -563,7 +563,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32); void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *); void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, - struct dentry *dentry, __be32 *buffer, int *countp, + struct dentry *dentry, __be32 **buffer, int countp, u32 *bmval, struct svc_rqst *, int ignore_crossmnt); extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h new file mode 100644 index 00000000000..720446cb243 --- /dev/null +++ b/include/acpi/ghes.h @@ -0,0 +1,72 @@ +#include <acpi/apei.h> +#include <acpi/hed.h> + +/* + * One struct ghes is created for each generic hardware error source. + * It provides the context for APEI hardware error timer/IRQ/SCI/NMI + * handler. + * + * estatus: memory buffer for error status block, allocated during + * HEST parsing. + */ +#define GHES_TO_CLEAR 0x0001 +#define GHES_EXITING 0x0002 + +struct ghes { + struct acpi_hest_generic *generic; + struct acpi_hest_generic_status *estatus; + u64 buffer_paddr; + unsigned long flags; + union { + struct list_head list; + struct timer_list timer; + unsigned int irq; + }; +}; + +struct ghes_estatus_node { + struct llist_node llnode; + struct acpi_hest_generic *generic; + struct ghes *ghes; +}; + +struct ghes_estatus_cache { + u32 estatus_len; + atomic_t count; + struct acpi_hest_generic *generic; + unsigned long long time_in; + struct rcu_head rcu; +}; + +enum { + GHES_SEV_NO = 0x0, + GHES_SEV_CORRECTED = 0x1, + GHES_SEV_RECOVERABLE = 0x2, + GHES_SEV_PANIC = 0x3, +}; + +/* From drivers/edac/ghes_edac.c */ + +#ifdef CONFIG_EDAC_GHES +void ghes_edac_report_mem_error(struct ghes *ghes, int sev, + struct cper_sec_mem_err *mem_err); + +int ghes_edac_register(struct ghes *ghes, struct device *dev); + +void ghes_edac_unregister(struct ghes *ghes); + +#else +static inline void ghes_edac_report_mem_error(struct ghes *ghes, int sev, + struct cper_sec_mem_err *mem_err) +{ +} + +static inline int ghes_edac_register(struct ghes *ghes, struct device *dev) +{ + return 0; +} + +static inline void ghes_edac_unregister(struct ghes *ghes) +{ +} +#endif diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f94bc83011e..78feda9bbae 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -19,6 +19,7 @@ #include <linux/gfp.h> #include <linux/bsg.h> #include <linux/smp.h> +#include <linux/rcupdate.h> #include <asm/scatterlist.h> @@ -437,6 +438,7 @@ struct request_queue { /* Throttle data */ struct throtl_data *td; #endif + struct rcu_head rcu_head; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ @@ -974,7 +976,6 @@ struct blk_plug { unsigned long magic; /* detect uninitialized use-cases */ struct list_head list; /* requests */ struct list_head cb_list; /* md requires an unplug callback */ - unsigned int should_sort; /* list to be sorted before flushing? */ }; #define BLK_MAX_REQUEST_COUNT 16 diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 7c2e030e72f..0ea61e07a91 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -12,6 +12,7 @@ struct blk_trace { int trace_state; + bool rq_based; struct rchan *rchan; unsigned long __percpu *sequence; unsigned char __percpu *msg_data; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 458f497738a..5afc4f94d11 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -126,7 +126,6 @@ BUFFER_FNS(Write_EIO, write_io_error) BUFFER_FNS(Unwritten, unwritten) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) -#define touch_buffer(bh) mark_page_accessed(bh->b_page) /* If we *know* page->private refers to buffer_heads */ #define page_buffers(page) \ @@ -142,6 +141,7 @@ BUFFER_FNS(Unwritten, unwritten) void mark_buffer_dirty(struct buffer_head *bh); void init_buffer(struct buffer_head *, bh_end_io_t *, void *); +void touch_buffer(struct buffer_head *bh); void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); int try_to_free_buffers(struct page *); diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index dad579b0c0e..76554cecaab 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -12,16 +12,46 @@ #define CEPH_FEATURE_MONNAMES (1<<5) #define CEPH_FEATURE_RECONNECT_SEQ (1<<6) #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) -/* bits 8-17 defined by user-space; not supported yet here */ +#define CEPH_FEATURE_OBJECTLOCATOR (1<<8) +#define CEPH_FEATURE_PGID64 (1<<9) +#define CEPH_FEATURE_INCSUBOSDMAP (1<<10) +#define CEPH_FEATURE_PGPOOL3 (1<<11) +#define CEPH_FEATURE_OSDREPLYMUX (1<<12) +#define CEPH_FEATURE_OSDENC (1<<13) +#define CEPH_FEATURE_OMAP (1<<14) +#define CEPH_FEATURE_MONENC (1<<15) +#define CEPH_FEATURE_QUERY_T (1<<16) +#define CEPH_FEATURE_INDEP_PG_MAP (1<<17) #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) +#define CEPH_FEATURE_CHUNKY_SCRUB (1<<19) +#define CEPH_FEATURE_MON_NULLROUTE (1<<20) +#define CEPH_FEATURE_MON_GV (1<<21) +#define CEPH_FEATURE_BACKFILL_RESERVATION (1<<22) +#define CEPH_FEATURE_MSG_AUTH (1<<23) +#define CEPH_FEATURE_RECOVERY_RESERVATION (1<<24) +#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25) +#define CEPH_FEATURE_CREATEPOOLID (1<<26) +#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) +#define CEPH_FEATURE_OSD_HBMSGS (1<<28) +#define CEPH_FEATURE_MDSENC (1<<29) +#define CEPH_FEATURE_OSDHASHPSPOOL (1<<30) /* * Features supported. */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR | \ - CEPH_FEATURE_CRUSH_TUNABLES) + (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_PGID64 | \ + CEPH_FEATURE_PGPOOL3 | \ + CEPH_FEATURE_OSDENC | \ + CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_CRUSH_TUNABLES2 | \ + CEPH_FEATURE_REPLY_CREATE_INODE | \ + CEPH_FEATURE_OSDHASHPSPOOL) #define CEPH_FEATURES_REQUIRED_DEFAULT \ - (CEPH_FEATURE_NOSRCADDR) + (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_PGID64 | \ + CEPH_FEATURE_PGPOOL3 | \ + CEPH_FEATURE_OSDENC) #endif diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index cf6f4d998a7..2ad7b860f06 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -21,16 +21,14 @@ * internal cluster protocols separately from the public, * client-facing protocol. */ -#define CEPH_OSD_PROTOCOL 8 /* cluster internal */ -#define CEPH_MDS_PROTOCOL 12 /* cluster internal */ -#define CEPH_MON_PROTOCOL 5 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 24 /* server/client */ #define CEPH_MDSC_PROTOCOL 32 /* server/client */ #define CEPH_MONC_PROTOCOL 15 /* server/client */ -#define CEPH_INO_ROOT 1 -#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_ROOT 1 +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 @@ -51,7 +49,7 @@ struct ceph_file_layout { __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ /* object -> pg layout */ - __le32 fl_unused; /* unused; used to be preferred primary (-1) */ + __le32 fl_unused; /* unused; used to be preferred primary for pg (-1 for none) */ __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ } __attribute__ ((packed)); @@ -101,6 +99,8 @@ struct ceph_dir_layout { #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 #define CEPH_MSG_AUTH 17 #define CEPH_MSG_AUTH_REPLY 18 +#define CEPH_MSG_MON_GET_VERSION 19 +#define CEPH_MSG_MON_GET_VERSION_REPLY 20 /* client <-> mds */ #define CEPH_MSG_MDS_MAP 21 @@ -221,6 +221,11 @@ struct ceph_mon_subscribe_ack { } __attribute__ ((packed)); /* + * mdsmap flags + */ +#define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */ + +/* * mds states * > 0 -> in * <= 0 -> out @@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack { #define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ #define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ +#define CEPH_MDS_STATE_REPLAYONCE -9 /* up, replaying an active node's journal */ #define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed @@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s); #define CEPH_LOCK_IXATTR 2048 #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */ /* client_session ops */ enum { @@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_SETATTR_SIZE 32 #define CEPH_SETATTR_CTIME 64 +/* + * Ceph setxattr request flags. + */ +#define CEPH_XATTR_CREATE 1 +#define CEPH_XATTR_REPLACE 2 + union ceph_mds_request_args { struct { __le32 mask; /* CEPH_CAP_* */ @@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags); #define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ #define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ +#define CEPH_CAP_SIMPLE_BITS 2 +#define CEPH_CAP_FILE_BITS 8 + /* per-lock shift */ #define CEPH_CAP_SAUTH 2 #define CEPH_CAP_SLINK 4 #define CEPH_CAP_SXATTR 6 #define CEPH_CAP_SFILE 8 -#define CEPH_CAP_SFLOCK 20 +#define CEPH_CAP_SFLOCK 20 -#define CEPH_CAP_BITS 22 +#define CEPH_CAP_BITS 22 /* composed values */ #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 63d092822ba..360d9d08ca9 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n) return end >= *p && n <= end - *p; } -#define ceph_decode_need(p, end, n, bad) \ - do { \ - if (!likely(ceph_has_room(p, end, n))) \ - goto bad; \ +#define ceph_decode_need(p, end, n, bad) \ + do { \ + if (!likely(ceph_has_room(p, end, n))) \ + goto bad; \ } while (0) #define ceph_decode_64_safe(p, end, v, bad) \ @@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n) * * There are two possible failures: * - converting the string would require accessing memory at or - * beyond the "end" pointer provided (-E - * - memory could not be allocated for the result + * beyond the "end" pointer provided (-ERANGE) + * - memory could not be allocated for the result (-ENOMEM) */ static inline char *ceph_extract_encoded_string(void **p, void *end, size_t *lenp, gfp_t gfp) @@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end, *p += len; } -#define ceph_encode_need(p, end, n, bad) \ - do { \ - if (!likely(ceph_has_room(p, end, n))) \ - goto bad; \ +#define ceph_encode_need(p, end, n, bad) \ + do { \ + if (!likely(ceph_has_room(p, end, n))) \ + goto bad; \ } while (0) #define ceph_encode_64_safe(p, end, v, bad) \ @@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end, #define ceph_encode_32_safe(p, end, v, bad) \ do { \ ceph_encode_need(p, end, sizeof(u32), bad); \ - ceph_encode_32(p, v); \ + ceph_encode_32(p, v); \ } while (0) #define ceph_encode_16_safe(p, end, v, bad) \ do { \ ceph_encode_need(p, end, sizeof(u16), bad); \ - ceph_encode_16(p, v); \ + ceph_encode_16(p, v); \ + } while (0) +#define ceph_encode_8_safe(p, end, v, bad) \ + do { \ + ceph_encode_need(p, end, sizeof(u8), bad); \ + ceph_encode_8(p, v); \ } while (0) #define ceph_encode_copy_safe(p, end, pv, n, bad) \ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 084d3c622b1..29818fc3fa4 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len) } /* ceph_common.c */ +extern bool libceph_compatible(void *data); + extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); extern struct kmem_cache *ceph_inode_cachep; @@ -220,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client); /* pagevec.c */ extern void ceph_release_page_vector(struct page **pages, int num_pages); -extern struct page **ceph_get_direct_page_vector(const char __user *data, +extern struct page **ceph_get_direct_page_vector(const void __user *data, int num_pages, bool write_page); extern void ceph_put_page_vector(struct page **pages, int num_pages, @@ -228,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages, extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, + const void __user *data, loff_t off, size_t len); -extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, +extern void ceph_copy_to_page_vector(struct page **pages, + const void *data, loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, - char *data, +extern void ceph_copy_from_page_vector(struct page **pages, + void *data, loff_t off, size_t len); -extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, +extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, loff_t off, size_t len); extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index cb15b5d867c..87ed09f5480 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -29,8 +29,8 @@ struct ceph_mdsmap { /* which object pools file data can be stored in */ int m_num_data_pg_pools; - u32 *m_data_pg_pools; - u32 m_cas_pg_pool; + u64 *m_data_pg_pools; + u64 m_cas_pg_pool; }; static inline struct ceph_entity_addr * diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 14ba5ee738a..60903e0f665 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -83,9 +83,11 @@ struct ceph_msg { struct list_head list_head; struct kref kref; +#ifdef CONFIG_BLOCK struct bio *bio; /* instead of pages/pagelist */ struct bio *bio_iter; /* bio iterator */ int bio_seg; /* current bio segment */ +#endif /* CONFIG_BLOCK */ struct ceph_pagelist *trail; /* the trailing part of the data */ bool front_is_vmalloc; bool more_to_follow; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d9b880e977e..1dd5d466b6f 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -10,6 +10,7 @@ #include <linux/ceph/osdmap.h> #include <linux/ceph/messenger.h> #include <linux/ceph/auth.h> +#include <linux/ceph/pagelist.h> /* * Maximum object name size @@ -22,7 +23,6 @@ struct ceph_snap_context; struct ceph_osd_request; struct ceph_osd_client; struct ceph_authorizer; -struct ceph_pagelist; /* * completion callback for async writepages @@ -47,6 +47,9 @@ struct ceph_osd { struct list_head o_keepalive_item; }; + +#define CEPH_OSD_MAX_OP 10 + /* an in-flight request */ struct ceph_osd_request { u64 r_tid; /* unique for this client */ @@ -63,9 +66,23 @@ struct ceph_osd_request { struct ceph_connection *r_con_filling_msg; struct ceph_msg *r_request, *r_reply; - int r_result; int r_flags; /* any additional flags for the osd */ u32 r_sent; /* >0 if r_request is sending/sent */ + int r_num_ops; + + /* encoded message content */ + struct ceph_osd_op *r_request_ops; + /* these are updated on each send */ + __le32 *r_request_osdmap_epoch; + __le32 *r_request_flags; + __le64 *r_request_pool; + void *r_request_pgid; + __le32 *r_request_attempts; + struct ceph_eversion *r_request_reassert_version; + + int r_result; + int r_reply_op_len[CEPH_OSD_MAX_OP]; + s32 r_reply_op_result[CEPH_OSD_MAX_OP]; int r_got_reply; int r_linger; @@ -82,6 +99,7 @@ struct ceph_osd_request { char r_oid[MAX_OBJ_NAME_SIZE]; /* object name */ int r_oid_len; + u64 r_snapid; unsigned long r_stamp; /* send OR check time */ struct ceph_file_layout r_file_layout; @@ -95,7 +113,7 @@ struct ceph_osd_request { struct bio *r_bio; /* instead of pages */ #endif - struct ceph_pagelist *r_trail; /* trailing part of the data */ + struct ceph_pagelist r_trail; /* trailing part of the data */ }; struct ceph_osd_event { @@ -107,7 +125,6 @@ struct ceph_osd_event { struct rb_node node; struct list_head osd_node; struct kref kref; - struct completion completion; }; struct ceph_osd_event_work { @@ -157,7 +174,7 @@ struct ceph_osd_client { struct ceph_osd_req_op { u16 op; /* CEPH_OSD_OP_* */ - u32 flags; /* CEPH_OSD_FLAG_* */ + u32 payload_len; union { struct { u64 offset, length; @@ -166,23 +183,24 @@ struct ceph_osd_req_op { } extent; struct { const char *name; - u32 name_len; const char *val; + u32 name_len; u32 value_len; __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ } xattr; struct { const char *class_name; - __u8 class_len; const char *method_name; - __u8 method_len; - __u8 argc; const char *indata; u32 indata_len; + __u8 class_len; + __u8 method_len; + __u8 argc; } cls; struct { - u64 cookie, count; + u64 cookie; + u64 count; } pgls; struct { u64 snapid; @@ -190,12 +208,11 @@ struct ceph_osd_req_op { struct { u64 cookie; u64 ver; - __u8 flag; u32 prot_ver; u32 timeout; + __u8 flag; } watch; }; - u32 payload_len; }; extern int ceph_osdc_init(struct ceph_osd_client *osdc, @@ -207,29 +224,19 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); -extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, - u64 snapid, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op); - extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, - int flags, struct ceph_snap_context *snapc, - struct ceph_osd_req_op *ops, + unsigned int num_op, bool use_mempool, - gfp_t gfp_flags, - struct page **pages, - struct bio *bio); + gfp_t gfp_flags); extern void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 *plen, + u64 off, u64 len, + unsigned int num_op, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, - struct timespec *mtime, - const char *oid, - int oid_len); + u64 snap_id, + struct timespec *mtime); extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, struct ceph_file_layout *layout, @@ -239,8 +246,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, int do_sync, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply, - int page_align); + bool use_mempool, int page_align); extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, struct ceph_osd_request *req); @@ -279,17 +285,13 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - struct page **pages, int nr_pages, - int flags, int do_sync, bool nofail); + struct page **pages, int nr_pages); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), - int one_shot, void *data, - struct ceph_osd_event **pevent); + void *data, struct ceph_osd_event **pevent); extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); -extern int ceph_osdc_wait_event(struct ceph_osd_event *event, - unsigned long timeout); extern void ceph_osdc_put_event(struct ceph_osd_event *event); #endif diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 10a417f9f76..c819190d164 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -18,14 +18,31 @@ * The map can be updated either via an incremental map (diff) describing * the change between two successive epochs, or as a fully encoded map. */ +struct ceph_pg { + uint64_t pool; + uint32_t seed; +}; + +#define CEPH_POOL_FLAG_HASHPSPOOL 1 + struct ceph_pg_pool_info { struct rb_node node; - int id; - struct ceph_pg_pool v; - int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; + s64 id; + u8 type; + u8 size; + u8 crush_ruleset; + u8 object_hash; + u32 pg_num, pgp_num; + int pg_num_mask, pgp_num_mask; + u64 flags; char *name; }; +struct ceph_object_locator { + uint64_t pool; + char *key; +}; + struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; @@ -110,15 +127,16 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map); /* calculate mapping of a file extent to an object */ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 *plen, + u64 off, u64 len, u64 *bno, u64 *oxoff, u64 *oxlen); /* calculate mapping of object to a placement group */ -extern int ceph_calc_object_layout(struct ceph_object_layout *ol, +extern int ceph_calc_object_layout(struct ceph_pg *pg, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap); -extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, +extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, + struct ceph_pg pgid, int *acting); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2c04afeead1..68c96a508ac 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -9,14 +9,6 @@ #include <linux/ceph/msgr.h> /* - * osdmap encoding versions - */ -#define CEPH_OSDMAP_INC_VERSION 5 -#define CEPH_OSDMAP_INC_VERSION_EXT 6 -#define CEPH_OSDMAP_VERSION 5 -#define CEPH_OSDMAP_VERSION_EXT 6 - -/* * fs id */ struct ceph_fsid { @@ -64,7 +56,7 @@ struct ceph_timespec { * placement group. * we encode this into one __le64. */ -struct ceph_pg { +struct ceph_pg_v1 { __le16 preferred; /* preferred primary osd */ __le16 ps; /* placement seed */ __le32 pool; /* object pool */ @@ -91,21 +83,6 @@ struct ceph_pg { #define CEPH_PG_TYPE_REP 1 #define CEPH_PG_TYPE_RAID4 2 -#define CEPH_PG_POOL_VERSION 2 -struct ceph_pg_pool { - __u8 type; /* CEPH_PG_TYPE_* */ - __u8 size; /* number of osds in each pg */ - __u8 crush_ruleset; /* crush placement rule */ - __u8 object_hash; /* hash mapping object name to ps */ - __le32 pg_num, pgp_num; /* number of pg's */ - __le32 lpg_num, lpgp_num; /* number of localized pg's */ - __le32 last_change; /* most recent epoch changed */ - __le64 snap_seq; /* seq for per-pool snapshot */ - __le32 snap_epoch; /* epoch of last snap */ - __le32 num_snaps; - __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ - __le64 auid; /* who owns the pg */ -} __attribute__ ((packed)); /* * stable_mod func is used to control number of placement groups. @@ -128,7 +105,7 @@ static inline int ceph_stable_mod(int x, int b, int bmask) * object layout - how a given object should be stored. */ struct ceph_object_layout { - struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */ + struct ceph_pg_v1 ol_pgid; /* raw pg, with _full_ ps precision. */ __le32 ol_stripe_unit; /* for per-object parity, if any */ } __attribute__ ((packed)); @@ -145,8 +122,12 @@ struct ceph_eversion { */ /* status bits */ -#define CEPH_OSD_EXISTS 1 -#define CEPH_OSD_UP 2 +#define CEPH_OSD_EXISTS (1<<0) +#define CEPH_OSD_UP (1<<1) +#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ +#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ + +extern const char *ceph_osd_state_name(int s); /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ #define CEPH_OSD_IN 0x10000 @@ -161,9 +142,25 @@ struct ceph_eversion { #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ +#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */ +#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */ +#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */ +#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ +#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ +#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ + +/* + * The error code to return when an OSD can't handle a write + * because it is too large. + */ +#define OSD_WRITETOOBIG EMSGSIZE /* * osd ops + * + * WARNING: do not use these op codes directly. Use the helpers + * defined below instead. In certain cases, op code behavior was + * redefined, resulting in special-cases in the helpers. */ #define CEPH_OSD_OP_MODE 0xf000 #define CEPH_OSD_OP_MODE_RD 0x1000 @@ -177,6 +174,7 @@ struct ceph_eversion { #define CEPH_OSD_OP_TYPE_ATTR 0x0300 #define CEPH_OSD_OP_TYPE_EXEC 0x0400 #define CEPH_OSD_OP_TYPE_PG 0x0500 +#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */ enum { /** data **/ @@ -217,6 +215,23 @@ enum { CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, + /* omap */ + CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17, + CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18, + CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19, + CEPH_OSD_OP_OMAPGETVALSBYKEYS = + CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20, + CEPH_OSD_OP_OMAPSETVALS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21, + CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22, + CEPH_OSD_OP_OMAPCLEAR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23, + CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, + CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, + + /** multi **/ + CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, + CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, + CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3, + /** attrs **/ /* read */ CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, @@ -238,6 +253,7 @@ enum { CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, + CEPH_OSD_OP_SCRUB_MAP = CEPH_OSD_OP_MODE_SUB | 9, /** lock **/ CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, @@ -248,10 +264,12 @@ enum { CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, /** exec **/ + /* note: the RD bit here is wrong; see special-case below in helper */ CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, /** pg **/ CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, + CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2, }; static inline int ceph_osd_op_type_lock(int op) @@ -274,6 +292,10 @@ static inline int ceph_osd_op_type_pg(int op) { return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; } +static inline int ceph_osd_op_type_multi(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI; +} static inline int ceph_osd_op_mode_subop(int op) { @@ -281,11 +303,12 @@ static inline int ceph_osd_op_mode_subop(int op) } static inline int ceph_osd_op_mode_read(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; + return (op & CEPH_OSD_OP_MODE_RD) && + op != CEPH_OSD_OP_CALL; } static inline int ceph_osd_op_mode_modify(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; + return op & CEPH_OSD_OP_MODE_WR; } /* @@ -294,34 +317,38 @@ static inline int ceph_osd_op_mode_modify(int op) */ #define CEPH_OSD_TMAP_HDR 'h' #define CEPH_OSD_TMAP_SET 's' +#define CEPH_OSD_TMAP_CREATE 'c' /* create key */ #define CEPH_OSD_TMAP_RM 'r' +#define CEPH_OSD_TMAP_RMSLOPPY 'R' extern const char *ceph_osd_op_name(int op); - /* * osd op flags * * An op may be READ, WRITE, or READ|WRITE. */ enum { - CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ - CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ - CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ - CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ - CEPH_OSD_FLAG_READ = 16, /* op may read */ - CEPH_OSD_FLAG_WRITE = 32, /* op may write */ - CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ - CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ - CEPH_OSD_FLAG_BALANCE_READS = 256, - CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ - CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ - CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ - CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ + CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */ + CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */ + CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */ + CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */ + CEPH_OSD_FLAG_READ = 0x0010, /* op may read */ + CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */ + CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */ + CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */ + CEPH_OSD_FLAG_BALANCE_READS = 0x0100, + CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */ + CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */ + CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */ + CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */ + CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */ + CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */ }; enum { CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ + CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */ }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ @@ -381,48 +408,13 @@ struct ceph_osd_op { __le64 ver; __u8 flag; /* 0 = unwatch, 1 = watch */ } __attribute__ ((packed)) watch; -}; + struct { + __le64 offset, length; + __le64 src_offset; + } __attribute__ ((packed)) clonerange; + }; __le32 payload_len; } __attribute__ ((packed)); -/* - * osd request message header. each request may include multiple - * ceph_osd_op object operations. - */ -struct ceph_osd_request_head { - __le32 client_inc; /* client incarnation */ - struct ceph_object_layout layout; /* pgid */ - __le32 osdmap_epoch; /* client's osdmap epoch */ - - __le32 flags; - - struct ceph_timespec mtime; /* for mutations only */ - struct ceph_eversion reassert_version; /* if we are replaying op */ - - __le32 object_len; /* length of object name */ - - __le64 snapid; /* snapid to read */ - __le64 snap_seq; /* writer's snap context */ - __le32 num_snaps; - - __le16 num_ops; - struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */ -} __attribute__ ((packed)); - -struct ceph_osd_reply_head { - __le32 client_inc; /* client incarnation */ - __le32 flags; - struct ceph_object_layout layout; - __le32 osdmap_epoch; - struct ceph_eversion reassert_version; /* for replaying uncommitted */ - - __le32 result; /* result code */ - - __le32 object_len; /* length of object name */ - __le32 num_ops; - struct ceph_osd_op ops[0]; /* ops[], object */ -} __attribute__ ((packed)); - - #endif diff --git a/include/linux/completion.h b/include/linux/completion.h index 51494e6b554..33f0280fd53 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -77,10 +77,13 @@ static inline void init_completion(struct completion *x) } extern void wait_for_completion(struct completion *); +extern void wait_for_completion_io(struct completion *); extern int wait_for_completion_interruptible(struct completion *x); extern int wait_for_completion_killable(struct completion *x); extern unsigned long wait_for_completion_timeout(struct completion *x, unsigned long timeout); +extern unsigned long wait_for_completion_io_timeout(struct completion *x, + unsigned long timeout); extern long wait_for_completion_interruptible_timeout( struct completion *x, unsigned long timeout); extern long wait_for_completion_killable_timeout( diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 25baa287cff..6a1101f24cf 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -162,6 +162,8 @@ struct crush_map { __u32 choose_local_fallback_tries; /* choose attempts before giving up */ __u32 choose_total_tries; + /* attempt chooseleaf inner descent once; on failure retry outer descent */ + __u32 chooseleaf_descend_once; }; diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 3d754a394e9..9978b614a1a 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -119,8 +119,10 @@ struct dma_buf { struct file *file; struct list_head attachments; const struct dma_buf_ops *ops; - /* mutex to serialize list manipulation and attach/detach */ + /* mutex to serialize list manipulation, attach/detach and vmap/unmap */ struct mutex lock; + unsigned vmapping_counter; + void *vmap_ptr; void *priv; }; diff --git a/include/linux/edac.h b/include/linux/edac.h index 1b8c02b36f7..4fd4999ccb5 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -14,7 +14,6 @@ #include <linux/atomic.h> #include <linux/device.h> -#include <linux/kobject.h> #include <linux/completion.h> #include <linux/workqueue.h> #include <linux/debugfs.h> @@ -48,8 +47,17 @@ static inline void opstate_init(void) return; } +/* Max length of a DIMM label*/ #define EDAC_MC_LABEL_LEN 31 -#define MC_PROC_NAME_MAX_LEN 7 + +/* Maximum size of the location string */ +#define LOCATION_SIZE 80 + +/* Defines the maximum number of labels that can be reported */ +#define EDAC_MAX_LABELS 8 + +/* String used to join two or more labels */ +#define OTHER_LABEL " or " /** * enum dev_type - describe the type of memory DRAM chips used at the stick @@ -101,8 +109,24 @@ enum hw_event_mc_err_type { HW_EVENT_ERR_CORRECTED, HW_EVENT_ERR_UNCORRECTED, HW_EVENT_ERR_FATAL, + HW_EVENT_ERR_INFO, }; +static inline char *mc_event_error_type(const unsigned int err_type) +{ + switch (err_type) { + case HW_EVENT_ERR_CORRECTED: + return "Corrected"; + case HW_EVENT_ERR_UNCORRECTED: + return "Uncorrected"; + case HW_EVENT_ERR_FATAL: + return "Fatal"; + default: + case HW_EVENT_ERR_INFO: + return "Info"; + } +} + /** * enum mem_type - memory types. For a more detailed reference, please see * http://en.wikipedia.org/wiki/DRAM @@ -376,6 +400,9 @@ enum scrub_type { * @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel" * @EDAC_MC_LAYER_SLOT: memory layer is named "slot" * @EDAC_MC_LAYER_CHIP_SELECT: memory layer is named "chip select" + * @EDAC_MC_LAYER_ALL_MEM: memory layout is unknown. All memory is mapped + * as a single memory area. This is used when + * retrieving errors from a firmware driven driver. * * This enum is used by the drivers to tell edac_mc_sysfs what name should * be used when describing a memory stick location. @@ -385,6 +412,7 @@ enum edac_mc_layer_type { EDAC_MC_LAYER_CHANNEL, EDAC_MC_LAYER_SLOT, EDAC_MC_LAYER_CHIP_SELECT, + EDAC_MC_LAYER_ALL_MEM, }; /** @@ -551,6 +579,46 @@ struct errcount_attribute_data { int layer0, layer1, layer2; }; +/** + * edac_raw_error_desc - Raw error report structure + * @grain: minimum granularity for an error report, in bytes + * @error_count: number of errors of the same type + * @top_layer: top layer of the error (layer[0]) + * @mid_layer: middle layer of the error (layer[1]) + * @low_layer: low layer of the error (layer[2]) + * @page_frame_number: page where the error happened + * @offset_in_page: page offset + * @syndrome: syndrome of the error (or 0 if unknown or if + * the syndrome is not applicable) + * @msg: error message + * @location: location of the error + * @label: label of the affected DIMM(s) + * @other_detail: other driver-specific detail about the error + * @enable_per_layer_report: if false, the error affects all layers + * (typically, a memory controller error) + */ +struct edac_raw_error_desc { + /* + * NOTE: everything before grain won't be cleaned by + * edac_raw_error_desc_clean() + */ + char location[LOCATION_SIZE]; + char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * EDAC_MAX_LABELS]; + long grain; + + /* the vars below and grain will be cleaned on every new error report */ + u16 error_count; + int top_layer; + int mid_layer; + int low_layer; + unsigned long page_frame_number; + unsigned long offset_in_page; + unsigned long syndrome; + const char *msg; + const char *other_detail; + bool enable_per_layer_report; +}; + /* MEMORY controller information structure */ struct mem_ctl_info { @@ -630,7 +698,6 @@ struct mem_ctl_info { const char *mod_ver; const char *ctl_name; const char *dev_name; - char proc_name[MC_PROC_NAME_MAX_LEN + 1]; void *pvt_info; unsigned long start_time; /* mci load start time (in jiffies) */ @@ -659,6 +726,12 @@ struct mem_ctl_info { /* work struct for this MC */ struct delayed_work work; + /* + * Used to report an error - by being at the global struct + * makes the memory allocated by the EDAC core + */ + struct edac_raw_error_desc error_desc; + /* the internal state of this controller instance */ int op_state; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 18662063175..acd0312d46f 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -2,6 +2,7 @@ #define _LINUX_ELEVATOR_H #include <linux/percpu.h> +#include <linux/hashtable.h> #ifdef CONFIG_BLOCK @@ -96,6 +97,8 @@ struct elevator_type struct list_head list; }; +#define ELV_HASH_BITS 6 + /* * each queue has an elevator_queue associated with it */ @@ -105,8 +108,8 @@ struct elevator_queue void *elevator_data; struct kobject kobj; struct mutex sysfs_lock; - struct hlist_head *hash; unsigned int registered:1; + DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; /* diff --git a/include/linux/llist.h b/include/linux/llist.h index d0ab98f73d3..a5199f6d0e8 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -125,31 +125,6 @@ static inline void init_llist_head(struct llist_head *list) (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) /** - * llist_for_each_entry_safe - iterate safely against remove over some entries - * of lock-less list of given type. - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as a temporary storage. - * @node: the fist entry of deleted list entries. - * @member: the name of the llist_node with the struct. - * - * In general, some entries of the lock-less list can be traversed - * safely only after being removed from list, so start with an entry - * instead of list head. This variant allows removal of entries - * as we iterate. - * - * If being used on entries deleted from lock-less list directly, the - * traverse order is from the newest to the oldest added entry. If - * you want to traverse from the oldest to the newest, you must - * reverse the order by yourself before traversing. - */ -#define llist_for_each_entry_safe(pos, n, node, member) \ - for ((pos) = llist_entry((node), typeof(*(pos)), member), \ - (n) = (pos)->member.next; \ - &(pos)->member != NULL; \ - (pos) = llist_entry(n, typeof(*(pos)), member), \ - (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL) - -/** * llist_empty - tests whether a lock-less list is empty * @head: the list to test * diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index 0e62d84f9f7..dcaad79f54e 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -212,7 +212,8 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock); void nlmclnt_recovery(struct nlm_host *); -int nlmclnt_reclaim(struct nlm_host *, struct file_lock *); +int nlmclnt_reclaim(struct nlm_host *, struct file_lock *, + struct nlm_rqst *); void nlmclnt_next_cookie(struct nlm_cookie *); /* diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 31717bd287f..f11c1c2609d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2802,6 +2802,7 @@ #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX 0x3ce0 #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 +#define PCI_DEVICE_ID_INTEL_5100_19 0x65f3 #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 #define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 #define PCI_DEVICE_ID_INTEL_5400_ERR 0x4030 diff --git a/include/linux/platform_data/exynos_thermal.h b/include/linux/platform_data/exynos_thermal.h index a7bdb2f63b7..da7e6274b17 100644 --- a/include/linux/platform_data/exynos_thermal.h +++ b/include/linux/platform_data/exynos_thermal.h @@ -53,6 +53,8 @@ struct freq_clip_table { * struct exynos_tmu_platform_data * @threshold: basic temperature for generating interrupt * 25 <= threshold <= 125 [unit: degree Celsius] + * @threshold_falling: differntial value for setting threshold + * of temperature falling interrupt. * @trigger_levels: array for each interrupt levels * [unit: degree Celsius] * 0: temperature for trigger_level0 interrupt @@ -97,6 +99,7 @@ struct freq_clip_table { */ struct exynos_tmu_platform_data { u8 threshold; + u8 threshold_falling; u8 trigger_levels[4]; bool trigger_level0_en; bool trigger_level1_en; diff --git a/include/linux/sunrpc/addr.h b/include/linux/sunrpc/addr.h new file mode 100644 index 00000000000..07d8e53bedf --- /dev/null +++ b/include/linux/sunrpc/addr.h @@ -0,0 +1,170 @@ +/* + * linux/include/linux/sunrpc/addr.h + * + * Various routines for copying and comparing sockaddrs and for + * converting them to and from presentation format. + */ +#ifndef _LINUX_SUNRPC_ADDR_H +#define _LINUX_SUNRPC_ADDR_H + +#include <linux/socket.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <net/ipv6.h> + +size_t rpc_ntop(const struct sockaddr *, char *, const size_t); +size_t rpc_pton(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); +char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); +size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, + struct sockaddr *, const size_t); + +static inline unsigned short rpc_get_port(const struct sockaddr *sap) +{ + switch (sap->sa_family) { + case AF_INET: + return ntohs(((struct sockaddr_in *)sap)->sin_port); + case AF_INET6: + return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); + } + return 0; +} + +static inline void rpc_set_port(struct sockaddr *sap, + const unsigned short port) +{ + switch (sap->sa_family) { + case AF_INET: + ((struct sockaddr_in *)sap)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); + break; + } +} + +#define IPV6_SCOPE_DELIMITER '%' +#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") + +static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; + const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; + + return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; +} + +static inline bool __rpc_copy_addr4(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in *ssin = (struct sockaddr_in *) src; + struct sockaddr_in *dsin = (struct sockaddr_in *) dst; + + dsin->sin_family = ssin->sin_family; + dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; + return true; +} + +#if IS_ENABLED(CONFIG_IPV6) +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; + const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; + + if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) + return false; + else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) + return sin1->sin6_scope_id == sin2->sin6_scope_id; + + return true; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; + struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; + + dsin6->sin6_family = ssin6->sin6_family; + dsin6->sin6_addr = ssin6->sin6_addr; + dsin6->sin6_scope_id = ssin6->sin6_scope_id; + return true; +} +#else /* !(IS_ENABLED(CONFIG_IPV6) */ +static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + return false; +} + +static inline bool __rpc_copy_addr6(struct sockaddr *dst, + const struct sockaddr *src) +{ + return false; +} +#endif /* !(IS_ENABLED(CONFIG_IPV6) */ + +/** + * rpc_cmp_addr - compare the address portion of two sockaddrs. + * @sap1: first sockaddr + * @sap2: second sockaddr + * + * Just compares the family and address portion. Ignores port, but + * compares the scope if it's a link-local address. + * + * Returns true if the addrs are equal, false if they aren't. + */ +static inline bool rpc_cmp_addr(const struct sockaddr *sap1, + const struct sockaddr *sap2) +{ + if (sap1->sa_family == sap2->sa_family) { + switch (sap1->sa_family) { + case AF_INET: + return __rpc_cmp_addr4(sap1, sap2); + case AF_INET6: + return __rpc_cmp_addr6(sap1, sap2); + } + } + return false; +} + +/** + * rpc_copy_addr - copy the address portion of one sockaddr to another + * @dst: destination sockaddr + * @src: source sockaddr + * + * Just copies the address portion and family. Ignores port, scope, etc. + * Caller is responsible for making certain that dst is large enough to hold + * the address in src. Returns true if address family is supported. Returns + * false otherwise. + */ +static inline bool rpc_copy_addr(struct sockaddr *dst, + const struct sockaddr *src) +{ + switch (src->sa_family) { + case AF_INET: + return __rpc_copy_addr4(dst, src); + case AF_INET6: + return __rpc_copy_addr6(dst, src); + } + return false; +} + +/** + * rpc_get_scope_id - return scopeid for a given sockaddr + * @sa: sockaddr to get scopeid from + * + * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if + * not an AF_INET6 address. + */ +static inline u32 rpc_get_scope_id(const struct sockaddr *sa) +{ + if (sa->sa_family != AF_INET6) + return 0; + + return ((struct sockaddr_in6 *) sa)->sin6_scope_id; +} + +#endif /* _LINUX_SUNRPC_ADDR_H */ diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 5dc9ee4d616..303399b1ba5 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -83,6 +83,10 @@ struct cache_detail { int (*cache_upcall)(struct cache_detail *, struct cache_head *); + void (*cache_request)(struct cache_detail *cd, + struct cache_head *ch, + char **bpp, int *blen); + int (*cache_parse)(struct cache_detail *, char *buf, int len); @@ -157,11 +161,7 @@ sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); extern int -sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)); +sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); extern void cache_clean_deferred(void *owner); diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 34206b84d8d..4a4abde000c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -165,157 +165,5 @@ size_t rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t); const char *rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t); int rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t); -size_t rpc_ntop(const struct sockaddr *, char *, const size_t); -size_t rpc_pton(struct net *, const char *, const size_t, - struct sockaddr *, const size_t); -char * rpc_sockaddr2uaddr(const struct sockaddr *, gfp_t); -size_t rpc_uaddr2sockaddr(struct net *, const char *, const size_t, - struct sockaddr *, const size_t); - -static inline unsigned short rpc_get_port(const struct sockaddr *sap) -{ - switch (sap->sa_family) { - case AF_INET: - return ntohs(((struct sockaddr_in *)sap)->sin_port); - case AF_INET6: - return ntohs(((struct sockaddr_in6 *)sap)->sin6_port); - } - return 0; -} - -static inline void rpc_set_port(struct sockaddr *sap, - const unsigned short port) -{ - switch (sap->sa_family) { - case AF_INET: - ((struct sockaddr_in *)sap)->sin_port = htons(port); - break; - case AF_INET6: - ((struct sockaddr_in6 *)sap)->sin6_port = htons(port); - break; - } -} - -#define IPV6_SCOPE_DELIMITER '%' -#define IPV6_SCOPE_ID_LEN sizeof("%nnnnnnnnnn") - -static inline bool __rpc_cmp_addr4(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in *sin1 = (const struct sockaddr_in *)sap1; - const struct sockaddr_in *sin2 = (const struct sockaddr_in *)sap2; - - return sin1->sin_addr.s_addr == sin2->sin_addr.s_addr; -} - -static inline bool __rpc_copy_addr4(struct sockaddr *dst, - const struct sockaddr *src) -{ - const struct sockaddr_in *ssin = (struct sockaddr_in *) src; - struct sockaddr_in *dsin = (struct sockaddr_in *) dst; - - dsin->sin_family = ssin->sin_family; - dsin->sin_addr.s_addr = ssin->sin_addr.s_addr; - return true; -} - -#if IS_ENABLED(CONFIG_IPV6) -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - const struct sockaddr_in6 *sin1 = (const struct sockaddr_in6 *)sap1; - const struct sockaddr_in6 *sin2 = (const struct sockaddr_in6 *)sap2; - - if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr)) - return false; - else if (ipv6_addr_type(&sin1->sin6_addr) & IPV6_ADDR_LINKLOCAL) - return sin1->sin6_scope_id == sin2->sin6_scope_id; - - return true; -} - -static inline bool __rpc_copy_addr6(struct sockaddr *dst, - const struct sockaddr *src) -{ - const struct sockaddr_in6 *ssin6 = (const struct sockaddr_in6 *) src; - struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst; - - dsin6->sin6_family = ssin6->sin6_family; - dsin6->sin6_addr = ssin6->sin6_addr; - return true; -} -#else /* !(IS_ENABLED(CONFIG_IPV6) */ -static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - return false; -} - -static inline bool __rpc_copy_addr6(struct sockaddr *dst, - const struct sockaddr *src) -{ - return false; -} -#endif /* !(IS_ENABLED(CONFIG_IPV6) */ - -/** - * rpc_cmp_addr - compare the address portion of two sockaddrs. - * @sap1: first sockaddr - * @sap2: second sockaddr - * - * Just compares the family and address portion. Ignores port, scope, etc. - * Returns true if the addrs are equal, false if they aren't. - */ -static inline bool rpc_cmp_addr(const struct sockaddr *sap1, - const struct sockaddr *sap2) -{ - if (sap1->sa_family == sap2->sa_family) { - switch (sap1->sa_family) { - case AF_INET: - return __rpc_cmp_addr4(sap1, sap2); - case AF_INET6: - return __rpc_cmp_addr6(sap1, sap2); - } - } - return false; -} - -/** - * rpc_copy_addr - copy the address portion of one sockaddr to another - * @dst: destination sockaddr - * @src: source sockaddr - * - * Just copies the address portion and family. Ignores port, scope, etc. - * Caller is responsible for making certain that dst is large enough to hold - * the address in src. Returns true if address family is supported. Returns - * false otherwise. - */ -static inline bool rpc_copy_addr(struct sockaddr *dst, - const struct sockaddr *src) -{ - switch (src->sa_family) { - case AF_INET: - return __rpc_copy_addr4(dst, src); - case AF_INET6: - return __rpc_copy_addr6(dst, src); - } - return false; -} - -/** - * rpc_get_scope_id - return scopeid for a given sockaddr - * @sa: sockaddr to get scopeid from - * - * Returns the value of the sin6_scope_id for AF_INET6 addrs, or 0 if - * not an AF_INET6 address. - */ -static inline u32 rpc_get_scope_id(const struct sockaddr *sa) -{ - if (sa->sa_family != AF_INET6) - return 0; - - return ((struct sockaddr_in6 *) sa)->sin6_scope_id; -} - #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_CLNT_H */ diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 676ddf53b3e..1f0216b9a6c 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -50,6 +50,7 @@ struct svc_pool { unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ struct svc_pool_stats sp_stats; /* statistics on pool operation */ + int sp_task_pending;/* has pending task */ } ____cacheline_aligned_in_smp; /* diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 63988990bd3..15f9204ee70 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -56,7 +56,7 @@ struct xdr_buf { struct kvec head[1], /* RPC header + non-page data */ tail[1]; /* Appended after page data */ - struct page ** pages; /* Array of contiguous pages */ + struct page ** pages; /* Array of pages */ unsigned int page_base, /* Start of page data */ page_len, /* Length of page data */ flags; /* Flags for data disposition */ @@ -152,6 +152,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); diff --git a/include/linux/thermal.h b/include/linux/thermal.h index fe82022478e..f0bd7f90a90 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -74,6 +74,8 @@ enum thermal_trend { THERMAL_TREND_STABLE, /* temperature is stable */ THERMAL_TREND_RAISING, /* temperature is raising */ THERMAL_TREND_DROPPING, /* temperature is dropping */ + THERMAL_TREND_RAISE_FULL, /* apply highest cooling action */ + THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */ }; /* Events supported by Thermal Netlink */ @@ -121,6 +123,7 @@ struct thermal_zone_device_ops { int (*set_trip_hyst) (struct thermal_zone_device *, int, unsigned long); int (*get_crit_temp) (struct thermal_zone_device *, unsigned long *); + int (*set_emul_temp) (struct thermal_zone_device *, unsigned long); int (*get_trend) (struct thermal_zone_device *, int, enum thermal_trend *); int (*notify) (struct thermal_zone_device *, int, @@ -163,6 +166,7 @@ struct thermal_zone_device { int polling_delay; int temperature; int last_temperature; + int emul_temperature; int passive; unsigned int forced_passive; const struct thermal_zone_device_ops *ops; @@ -244,9 +248,11 @@ int thermal_register_governor(struct thermal_governor *); void thermal_unregister_governor(struct thermal_governor *); #ifdef CONFIG_NET -extern int thermal_generate_netlink_event(u32 orig, enum events event); +extern int thermal_generate_netlink_event(struct thermal_zone_device *tz, + enum events event); #else -static inline int thermal_generate_netlink_event(u32 orig, enum events event) +static int thermal_generate_netlink_event(struct thermal_zone_device *tz, + enum events event) { return 0; } diff --git a/include/linux/writeback.h b/include/linux/writeback.h index b82a83aba31..9a9367c0c07 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -87,9 +87,9 @@ int inode_wait(void *); void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); -int writeback_inodes_sb_if_idle(struct super_block *, enum wb_reason reason); -int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr, - enum wb_reason reason); +int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); +int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, + enum wb_reason reason); void sync_inodes_sb(struct super_block *); long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, enum wb_reason reason); diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 260470e7248..21cdb0b7b0f 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -78,9 +78,7 @@ TRACE_EVENT(mc_event, TP_printk("%d %s error%s:%s%s on %s (mc:%d location:%d:%d:%d address:0x%08lx grain:%d syndrome:0x%08lx%s%s)", __entry->error_count, - (__entry->error_type == HW_EVENT_ERR_CORRECTED) ? "Corrected" : - ((__entry->error_type == HW_EVENT_ERR_FATAL) ? - "Fatal" : "Uncorrected"), + mc_event_error_type(__entry->error_type), __entry->error_count > 1 ? "s" : "", ((char *)__get_str(msg))[0] ? " " : "", __get_str(msg), diff --git a/include/sound/aess.h b/include/sound/aess.h new file mode 100644 index 00000000000..cee0d09fadb --- /dev/null +++ b/include/sound/aess.h @@ -0,0 +1,53 @@ +/* + * AESS IP block reset + * + * Copyright (C) 2012 Texas Instruments, Inc. + * Paul Walmsley + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ +#ifndef __SOUND_AESS_H__ +#define __SOUND_AESS_H__ + +#include <linux/kernel.h> +#include <linux/io.h> + +/* + * AESS_AUTO_GATING_ENABLE_OFFSET: offset in bytes of the AESS IP + * block's AESS_AUTO_GATING_ENABLE__1 register from the IP block's + * base address + */ +#define AESS_AUTO_GATING_ENABLE_OFFSET 0x07c + +/* Register bitfields in the AESS_AUTO_GATING_ENABLE__1 register */ +#define AESS_AUTO_GATING_ENABLE_SHIFT 0 + +/** + * aess_enable_autogating - enable AESS internal autogating + * @oh: struct omap_hwmod * + * + * Enable internal autogating on the AESS. This allows the AESS to + * indicate that it is idle to the OMAP PRCM. Returns 0. + */ +static inline void aess_enable_autogating(void __iomem *base) +{ + u32 v; + + /* Set AESS_AUTO_GATING_ENABLE__1.ENABLE to allow idle entry */ + v = 1 << AESS_AUTO_GATING_ENABLE_SHIFT; + writel(v, base + AESS_AUTO_GATING_ENABLE_OFFSET); +} + +#endif /* __SOUND_AESS_H__ */ diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 05c5e61f0a7..9961726523d 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -6,10 +6,61 @@ #include <linux/blktrace_api.h> #include <linux/blkdev.h> +#include <linux/buffer_head.h> #include <linux/tracepoint.h> #define RWBS_LEN 8 +DECLARE_EVENT_CLASS(block_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh), + + TP_STRUCT__entry ( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( size_t, size ) + ), + + TP_fast_assign( + __entry->dev = bh->b_bdev->bd_dev; + __entry->sector = bh->b_blocknr; + __entry->size = bh->b_size; + ), + + TP_printk("%d,%d sector=%llu size=%zu", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->sector, __entry->size + ) +); + +/** + * block_touch_buffer - mark a buffer accessed + * @bh: buffer_head being touched + * + * Called from touch_buffer(). + */ +DEFINE_EVENT(block_buffer, block_touch_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh) +); + +/** + * block_dirty_buffer - mark a buffer dirty + * @bh: buffer_head being dirtied + * + * Called from mark_buffer_dirty(). + */ +DEFINE_EVENT(block_buffer, block_dirty_buffer, + + TP_PROTO(struct buffer_head *bh), + + TP_ARGS(bh) +); + DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -206,7 +257,6 @@ TRACE_EVENT(block_bio_bounce, /** * block_bio_complete - completed all work on the block operation - * @q: queue holding the block operation * @bio: block operation completed * @error: io error value * @@ -215,9 +265,9 @@ TRACE_EVENT(block_bio_bounce, */ TRACE_EVENT(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio, int error), + TP_PROTO(struct bio *bio, int error), - TP_ARGS(q, bio, error), + TP_ARGS(bio, error), TP_STRUCT__entry( __field( dev_t, dev ) @@ -228,7 +278,8 @@ TRACE_EVENT(block_bio_complete, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio->bi_bdev ? + bio->bi_bdev->bd_dev : 0; __entry->sector = bio->bi_sector; __entry->nr_sector = bio->bi_size >> 9; __entry->error = error; @@ -241,11 +292,11 @@ TRACE_EVENT(block_bio_complete, __entry->nr_sector, __entry->error) ); -DECLARE_EVENT_CLASS(block_bio, +DECLARE_EVENT_CLASS(block_bio_merge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio), + TP_ARGS(q, rq, bio), TP_STRUCT__entry( __field( dev_t, dev ) @@ -272,31 +323,33 @@ DECLARE_EVENT_CLASS(block_bio, /** * block_bio_backmerge - merging block operation to the end of an existing operation * @q: queue holding operation + * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block request @bio to the end of an existing block request * in queue @q. */ -DEFINE_EVENT(block_bio, block_bio_backmerge, +DEFINE_EVENT(block_bio_merge, block_bio_backmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, rq, bio) ); /** * block_bio_frontmerge - merging block operation to the beginning of an existing operation * @q: queue holding operation + * @rq: request bio is being merged into * @bio: new block operation to merge * * Merging block IO operation @bio to the beginning of an existing block * operation in queue @q. */ -DEFINE_EVENT(block_bio, block_bio_frontmerge, +DEFINE_EVENT(block_bio_merge, block_bio_frontmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), + TP_PROTO(struct request_queue *q, struct request *rq, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, rq, bio) ); /** @@ -306,11 +359,32 @@ DEFINE_EVENT(block_bio, block_bio_frontmerge, * * About to place the block IO operation @bio into queue @q. */ -DEFINE_EVENT(block_bio, block_bio_queue, +TRACE_EVENT(block_bio_queue, TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio) + TP_ARGS(q, bio), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( sector_t, sector ) + __field( unsigned int, nr_sector ) + __array( char, rwbs, RWBS_LEN ) + __array( char, comm, TASK_COMM_LEN ) + ), + + TP_fast_assign( + __entry->dev = bio->bi_bdev->bd_dev; + __entry->sector = bio->bi_sector; + __entry->nr_sector = bio->bi_size >> 9; + blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); + memcpy(__entry->comm, current->comm, TASK_COMM_LEN); + ), + + TP_printk("%d,%d %s %llu + %u [%s]", + MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs, + (unsigned long long)__entry->sector, + __entry->nr_sector, __entry->comm) ); DECLARE_EVENT_CLASS(block_get_rq, diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index b453d92c225..6a16fd2e70e 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -32,6 +32,115 @@ struct wb_writeback_work; +TRACE_EVENT(writeback_dirty_page, + + TP_PROTO(struct page *page, struct address_space *mapping), + + TP_ARGS(page, mapping), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(pgoff_t, index) + ), + + TP_fast_assign( + strncpy(__entry->name, + mapping ? dev_name(mapping->backing_dev_info->dev) : "(unknown)", 32); + __entry->ino = mapping ? mapping->host->i_ino : 0; + __entry->index = page->index; + ), + + TP_printk("bdi %s: ino=%lu index=%lu", + __entry->name, + __entry->ino, + __entry->index + ) +); + +DECLARE_EVENT_CLASS(writeback_dirty_inode_template, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(unsigned long, flags) + ), + + TP_fast_assign( + struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info; + + /* may be called for files on pseudo FSes w/ unregistered bdi */ + strncpy(__entry->name, + bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); + __entry->ino = inode->i_ino; + __entry->flags = flags; + ), + + TP_printk("bdi %s: ino=%lu flags=%s", + __entry->name, + __entry->ino, + show_inode_state(__entry->flags) + ) +); + +DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags) +); + +DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags) +); + +DECLARE_EVENT_CLASS(writeback_write_inode_template, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc), + + TP_STRUCT__entry ( + __array(char, name, 32) + __field(unsigned long, ino) + __field(int, sync_mode) + ), + + TP_fast_assign( + strncpy(__entry->name, + dev_name(inode->i_mapping->backing_dev_info->dev), 32); + __entry->ino = inode->i_ino; + __entry->sync_mode = wbc->sync_mode; + ), + + TP_printk("bdi %s: ino=%lu sync_mode=%d", + __entry->name, + __entry->ino, + __entry->sync_mode + ) +); + +DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode_start, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc) +); + +DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode, + + TP_PROTO(struct inode *inode, struct writeback_control *wbc), + + TP_ARGS(inode, wbc) +); + DECLARE_EVENT_CLASS(writeback_work_class, TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), TP_ARGS(bdi, work), @@ -479,6 +588,13 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template, ) ); +DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode_start, + TP_PROTO(struct inode *inode, + struct writeback_control *wbc, + unsigned long nr_to_write), + TP_ARGS(inode, wbc, nr_to_write) +); + DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, TP_PROTO(struct inode *inode, struct writeback_control *wbc, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 12af4270c9c..7f12624a393 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3258,7 +3258,8 @@ void complete_all(struct completion *x) EXPORT_SYMBOL(complete_all); static inline long __sched -do_wait_for_common(struct completion *x, long timeout, int state) +do_wait_for_common(struct completion *x, + long (*action)(long), long timeout, int state) { if (!x->done) { DECLARE_WAITQUEUE(wait, current); @@ -3271,7 +3272,7 @@ do_wait_for_common(struct completion *x, long timeout, int state) } __set_current_state(state); spin_unlock_irq(&x->wait.lock); - timeout = schedule_timeout(timeout); + timeout = action(timeout); spin_lock_irq(&x->wait.lock); } while (!x->done && timeout); __remove_wait_queue(&x->wait, &wait); @@ -3282,17 +3283,30 @@ do_wait_for_common(struct completion *x, long timeout, int state) return timeout ?: 1; } -static long __sched -wait_for_common(struct completion *x, long timeout, int state) +static inline long __sched +__wait_for_common(struct completion *x, + long (*action)(long), long timeout, int state) { might_sleep(); spin_lock_irq(&x->wait.lock); - timeout = do_wait_for_common(x, timeout, state); + timeout = do_wait_for_common(x, action, timeout, state); spin_unlock_irq(&x->wait.lock); return timeout; } +static long __sched +wait_for_common(struct completion *x, long timeout, int state) +{ + return __wait_for_common(x, schedule_timeout, timeout, state); +} + +static long __sched +wait_for_common_io(struct completion *x, long timeout, int state) +{ + return __wait_for_common(x, io_schedule_timeout, timeout, state); +} + /** * wait_for_completion: - waits for completion of a task * @x: holds the state of this particular completion @@ -3329,6 +3343,39 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout) EXPORT_SYMBOL(wait_for_completion_timeout); /** + * wait_for_completion_io: - waits for completion of a task + * @x: holds the state of this particular completion + * + * This waits to be signaled for completion of a specific task. It is NOT + * interruptible and there is no timeout. The caller is accounted as waiting + * for IO. + */ +void __sched wait_for_completion_io(struct completion *x) +{ + wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(wait_for_completion_io); + +/** + * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout) + * @x: holds the state of this particular completion + * @timeout: timeout value in jiffies + * + * This waits for either a completion of a specific task to be signaled or for a + * specified timeout to expire. The timeout is in jiffies. It is not + * interruptible. The caller is accounted as waiting for IO. + * + * The return value is 0 if timed out, and positive (at least 1, or number of + * jiffies left till timeout) if completed. + */ +unsigned long __sched +wait_for_completion_io_timeout(struct completion *x, unsigned long timeout) +{ + return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE); +} +EXPORT_SYMBOL(wait_for_completion_io_timeout); + +/** * wait_for_completion_interruptible: - waits for completion of a task (w/intr) * @x: holds the state of this particular completion * diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 314b9ee07ed..a19a39952c1 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -554,6 +554,7 @@ void tick_nohz_idle_enter(void) local_irq_enable(); } +EXPORT_SYMBOL_GPL(tick_nohz_idle_enter); /** * tick_nohz_irq_exit - update next tick event from interrupt exit @@ -685,6 +686,7 @@ void tick_nohz_idle_exit(void) local_irq_enable(); } +EXPORT_SYMBOL_GPL(tick_nohz_idle_exit); static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now) { diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 71259e2b6b6..9e5b8c272ee 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -739,6 +739,12 @@ static void blk_add_trace_rq_complete(void *ignore, struct request_queue *q, struct request *rq) { + struct blk_trace *bt = q->blk_trace; + + /* if control ever passes through here, it's a request based driver */ + if (unlikely(bt && !bt->rq_based)) + bt->rq_based = true; + blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); } @@ -774,15 +780,30 @@ static void blk_add_trace_bio_bounce(void *ignore, blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); } -static void blk_add_trace_bio_complete(void *ignore, - struct request_queue *q, struct bio *bio, - int error) +static void blk_add_trace_bio_complete(void *ignore, struct bio *bio, int error) { + struct request_queue *q; + struct blk_trace *bt; + + if (!bio->bi_bdev) + return; + + q = bdev_get_queue(bio->bi_bdev); + bt = q->blk_trace; + + /* + * Request based drivers will generate both rq and bio completions. + * Ignore bio ones. + */ + if (likely(!bt) || bt->rq_based) + return; + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); } static void blk_add_trace_bio_backmerge(void *ignore, struct request_queue *q, + struct request *rq, struct bio *bio) { blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); @@ -790,6 +811,7 @@ static void blk_add_trace_bio_backmerge(void *ignore, static void blk_add_trace_bio_frontmerge(void *ignore, struct request_queue *q, + struct request *rq, struct bio *bio) { blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index cdc377c456c..efe68148f62 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -696,7 +696,7 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi, * => fast response on large errors; small oscillation near setpoint */ setpoint = (freerun + limit) / 2; - x = div_s64((setpoint - dirty) << RATELIMIT_CALC_SHIFT, + x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT, limit - setpoint + 1); pos_ratio = x; pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT; @@ -1986,6 +1986,8 @@ int __set_page_dirty_no_writeback(struct page *page) */ void account_page_dirtied(struct page *page, struct address_space *mapping) { + trace_writeback_dirty_page(page, mapping); + if (mapping_cap_account_dirty(mapping)) { __inc_zone_page_state(page, NR_FILE_DIRTY); __inc_zone_page_state(page, NR_DIRTIED); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 1deb29af82f..e65e6e4be38 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -28,6 +28,22 @@ #include "crypto.h" +/* + * Module compatibility interface. For now it doesn't do anything, + * but its existence signals a certain level of functionality. + * + * The data buffer is used to pass information both to and from + * libceph. The return value indicates whether libceph determines + * it is compatible with the caller (from another kernel module), + * given the provided data. + * + * The data pointer can be null. + */ +bool libceph_compatible(void *data) +{ + return true; +} +EXPORT_SYMBOL(libceph_compatible); /* * find filename portion of a path (/foo/bar/baz -> baz) @@ -590,10 +606,8 @@ static int __init init_ceph_lib(void) if (ret < 0) goto out_crypto; - pr_info("loaded (mon/osd proto %d/%d, osdmap %d/%d %d/%d)\n", - CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL, - CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, - CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); + pr_info("loaded (mon/osd proto %d/%d)\n", + CEPH_MONC_PROTOCOL, CEPH_OSDC_PROTOCOL); return 0; diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c index 3fbda04de29..1348df96fe1 100644 --- a/net/ceph/ceph_strings.c +++ b/net/ceph/ceph_strings.c @@ -21,9 +21,15 @@ const char *ceph_osd_op_name(int op) switch (op) { case CEPH_OSD_OP_READ: return "read"; case CEPH_OSD_OP_STAT: return "stat"; + case CEPH_OSD_OP_MAPEXT: return "mapext"; + case CEPH_OSD_OP_SPARSE_READ: return "sparse-read"; + case CEPH_OSD_OP_NOTIFY: return "notify"; + case CEPH_OSD_OP_NOTIFY_ACK: return "notify-ack"; + case CEPH_OSD_OP_ASSERT_VER: return "assert-version"; case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; + case CEPH_OSD_OP_CREATE: return "create"; case CEPH_OSD_OP_WRITE: return "write"; case CEPH_OSD_OP_DELETE: return "delete"; case CEPH_OSD_OP_TRUNCATE: return "truncate"; @@ -39,6 +45,11 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_TMAPUP: return "tmapup"; case CEPH_OSD_OP_TMAPGET: return "tmapget"; case CEPH_OSD_OP_TMAPPUT: return "tmapput"; + case CEPH_OSD_OP_WATCH: return "watch"; + + case CEPH_OSD_OP_CLONERANGE: return "clonerange"; + case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version"; + case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr"; case CEPH_OSD_OP_GETXATTR: return "getxattr"; case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; @@ -53,6 +64,10 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; case CEPH_OSD_OP_SCRUB: return "scrub"; + case CEPH_OSD_OP_SCRUB_RESERVE: return "scrub-reserve"; + case CEPH_OSD_OP_SCRUB_UNRESERVE: return "scrub-unreserve"; + case CEPH_OSD_OP_SCRUB_STOP: return "scrub-stop"; + case CEPH_OSD_OP_SCRUB_MAP: return "scrub-map"; case CEPH_OSD_OP_WRLOCK: return "wrlock"; case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; @@ -64,10 +79,34 @@ const char *ceph_osd_op_name(int op) case CEPH_OSD_OP_CALL: return "call"; case CEPH_OSD_OP_PGLS: return "pgls"; + case CEPH_OSD_OP_PGLS_FILTER: return "pgls-filter"; + case CEPH_OSD_OP_OMAPGETKEYS: return "omap-get-keys"; + case CEPH_OSD_OP_OMAPGETVALS: return "omap-get-vals"; + case CEPH_OSD_OP_OMAPGETHEADER: return "omap-get-header"; + case CEPH_OSD_OP_OMAPGETVALSBYKEYS: return "omap-get-vals-by-keys"; + case CEPH_OSD_OP_OMAPSETVALS: return "omap-set-vals"; + case CEPH_OSD_OP_OMAPSETHEADER: return "omap-set-header"; + case CEPH_OSD_OP_OMAPCLEAR: return "omap-clear"; + case CEPH_OSD_OP_OMAPRMKEYS: return "omap-rm-keys"; } return "???"; } +const char *ceph_osd_state_name(int s) +{ + switch (s) { + case CEPH_OSD_EXISTS: + return "exists"; + case CEPH_OSD_UP: + return "up"; + case CEPH_OSD_AUTOOUT: + return "autoout"; + case CEPH_OSD_NEW: + return "new"; + default: + return "???"; + } +} const char *ceph_pool_op_name(int op) { diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 35fce755ce1..cbd06a91941 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in * @outpos: our position in that vector * @firstn: true if choosing "first n" items, false if choosing "indep" * @recurse_to_leaf: true if we want one device under each item of given type + * @descend_once: true if we should only try one descent before giving up * @out2: second output vector for leaf items (if @recurse_to_leaf) */ static int crush_choose(const struct crush_map *map, @@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map, int x, int numrep, int type, int *out, int outpos, int firstn, int recurse_to_leaf, - int *out2) + int descend_once, int *out2) { int rep; unsigned int ftotal, flocal; @@ -391,7 +392,7 @@ static int crush_choose(const struct crush_map *map, } reject = 0; - if (recurse_to_leaf) { + if (!collide && recurse_to_leaf) { if (item < 0) { if (crush_choose(map, map->buckets[-1-item], @@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map, x, outpos+1, 0, out2, outpos, firstn, 0, + map->chooseleaf_descend_once, NULL) <= outpos) /* didn't get leaf */ reject = 1; @@ -422,7 +424,10 @@ reject: ftotal++; flocal++; - if (collide && flocal <= map->choose_local_tries) + if (reject && descend_once) + /* let outer call try again */ + skip_rep = 1; + else if (collide && flocal <= map->choose_local_tries) /* retry locally a few times */ retry_bucket = 1; else if (map->choose_local_fallback_tries > 0 && @@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map, int i, j; int numrep; int firstn; + const int descend_once = 0; if ((__u32)ruleno >= map->max_rules) { dprintk(" bad ruleno %d\n", ruleno); @@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map, curstep->arg2, o+osize, j, firstn, - recurse_to_leaf, c+osize); + recurse_to_leaf, + descend_once, c+osize); } if (recurse_to_leaf) diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index af14cb42516..6e7a236525b 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -423,7 +423,8 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, } } -int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep) +static int ceph_key_instantiate(struct key *key, + struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey; size_t datalen = prep->datalen; @@ -458,12 +459,12 @@ err: return ret; } -int ceph_key_match(const struct key *key, const void *description) +static int ceph_key_match(const struct key *key, const void *description) { return strcmp(key->description, description) == 0; } -void ceph_key_destroy(struct key *key) { +static void ceph_key_destroy(struct key *key) { struct ceph_crypto_key *ckey = key->payload.data; ceph_crypto_key_destroy(ckey); diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 38b5dc1823d..00d051f4894 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -66,9 +66,9 @@ static int osdmap_show(struct seq_file *s, void *p) for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { struct ceph_pg_pool_info *pool = rb_entry(n, struct ceph_pg_pool_info, node); - seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", - pool->id, pool->v.pg_num, pool->pg_num_mask, - pool->v.lpg_num, pool->lpg_num_mask); + seq_printf(s, "pg_pool %llu pg_num %d / %d\n", + (unsigned long long)pool->id, pool->pg_num, + pool->pg_num_mask); } for (i = 0; i < client->osdc.osdmap->max_osd; i++) { struct ceph_entity_addr *addr = @@ -123,26 +123,16 @@ static int osdc_show(struct seq_file *s, void *pp) mutex_lock(&osdc->request_mutex); for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { struct ceph_osd_request *req; - struct ceph_osd_request_head *head; - struct ceph_osd_op *op; - int num_ops; - int opcode, olen; + int opcode; int i; req = rb_entry(p, struct ceph_osd_request, r_node); - seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, + seq_printf(s, "%lld\tosd%d\t%lld.%x\t", req->r_tid, req->r_osd ? req->r_osd->o_osd : -1, - le32_to_cpu(req->r_pgid.pool), - le16_to_cpu(req->r_pgid.ps)); + req->r_pgid.pool, req->r_pgid.seed); - head = req->r_request->front.iov_base; - op = (void *)(head + 1); - - num_ops = le16_to_cpu(head->num_ops); - olen = le32_to_cpu(head->object_len); - seq_printf(s, "%.*s", olen, - (const char *)(head->ops + num_ops)); + seq_printf(s, "%.*s", req->r_oid_len, req->r_oid); if (req->r_reassert_version.epoch) seq_printf(s, "\t%u'%llu", @@ -151,10 +141,9 @@ static int osdc_show(struct seq_file *s, void *pp) else seq_printf(s, "\t"); - for (i = 0; i < num_ops; i++) { - opcode = le16_to_cpu(op->op); + for (i = 0; i < req->r_num_ops; i++) { + opcode = le16_to_cpu(req->r_request_ops[i].op); seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); - op++; } seq_printf(s, "\n"); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5ccf87ed8d6..2c0669fb54e 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -9,8 +9,9 @@ #include <linux/slab.h> #include <linux/socket.h> #include <linux/string.h> +#ifdef CONFIG_BLOCK #include <linux/bio.h> -#include <linux/blkdev.h> +#endif /* CONFIG_BLOCK */ #include <linux/dns_resolver.h> #include <net/tcp.h> @@ -97,6 +98,57 @@ #define CON_FLAG_SOCK_CLOSED 3 /* socket state changed to closed */ #define CON_FLAG_BACKOFF 4 /* need to retry queuing delayed work */ +static bool con_flag_valid(unsigned long con_flag) +{ + switch (con_flag) { + case CON_FLAG_LOSSYTX: + case CON_FLAG_KEEPALIVE_PENDING: + case CON_FLAG_WRITE_PENDING: + case CON_FLAG_SOCK_CLOSED: + case CON_FLAG_BACKOFF: + return true; + default: + return false; + } +} + +static void con_flag_clear(struct ceph_connection *con, unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + clear_bit(con_flag, &con->flags); +} + +static void con_flag_set(struct ceph_connection *con, unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + set_bit(con_flag, &con->flags); +} + +static bool con_flag_test(struct ceph_connection *con, unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + return test_bit(con_flag, &con->flags); +} + +static bool con_flag_test_and_clear(struct ceph_connection *con, + unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + return test_and_clear_bit(con_flag, &con->flags); +} + +static bool con_flag_test_and_set(struct ceph_connection *con, + unsigned long con_flag) +{ + BUG_ON(!con_flag_valid(con_flag)); + + return test_and_set_bit(con_flag, &con->flags); +} + /* static tag bytes (protocol control messages) */ static char tag_msg = CEPH_MSGR_TAG_MSG; static char tag_ack = CEPH_MSGR_TAG_ACK; @@ -114,7 +166,7 @@ static struct lock_class_key socket_class; static void queue_con(struct ceph_connection *con); static void con_work(struct work_struct *); -static void ceph_fault(struct ceph_connection *con); +static void con_fault(struct ceph_connection *con); /* * Nicely render a sockaddr as a string. An array of formatted @@ -171,7 +223,7 @@ static void encode_my_addr(struct ceph_messenger *msgr) */ static struct workqueue_struct *ceph_msgr_wq; -void _ceph_msgr_exit(void) +static void _ceph_msgr_exit(void) { if (ceph_msgr_wq) { destroy_workqueue(ceph_msgr_wq); @@ -308,7 +360,7 @@ static void ceph_sock_write_space(struct sock *sk) * buffer. See net/ipv4/tcp_input.c:tcp_check_space() * and net/core/stream.c:sk_stream_write_space(). */ - if (test_bit(CON_FLAG_WRITE_PENDING, &con->flags)) { + if (con_flag_test(con, CON_FLAG_WRITE_PENDING)) { if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { dout("%s %p queueing write work\n", __func__, con); clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); @@ -333,7 +385,7 @@ static void ceph_sock_state_change(struct sock *sk) case TCP_CLOSE_WAIT: dout("%s TCP_CLOSE_WAIT\n", __func__); con_sock_state_closing(con); - set_bit(CON_FLAG_SOCK_CLOSED, &con->flags); + con_flag_set(con, CON_FLAG_SOCK_CLOSED); queue_con(con); break; case TCP_ESTABLISHED: @@ -474,7 +526,7 @@ static int con_close_socket(struct ceph_connection *con) * received a socket close event before we had the chance to * shut the socket down. */ - clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags); + con_flag_clear(con, CON_FLAG_SOCK_CLOSED); con_sock_state_closed(con); return rc; @@ -538,11 +590,10 @@ void ceph_con_close(struct ceph_connection *con) ceph_pr_addr(&con->peer_addr.in_addr)); con->state = CON_STATE_CLOSED; - clear_bit(CON_FLAG_LOSSYTX, &con->flags); /* so we retry next connect */ - clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); - clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); - clear_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags); - clear_bit(CON_FLAG_BACKOFF, &con->flags); + con_flag_clear(con, CON_FLAG_LOSSYTX); /* so we retry next connect */ + con_flag_clear(con, CON_FLAG_KEEPALIVE_PENDING); + con_flag_clear(con, CON_FLAG_WRITE_PENDING); + con_flag_clear(con, CON_FLAG_BACKOFF); reset_connection(con); con->peer_global_seq = 0; @@ -798,7 +849,7 @@ static void prepare_write_message(struct ceph_connection *con) /* no, queue up footer too and be done */ prepare_write_message_footer(con); - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } /* @@ -819,7 +870,7 @@ static void prepare_write_ack(struct ceph_connection *con) &con->out_temp_ack); con->out_more = 1; /* more will follow.. eventually.. */ - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } /* @@ -830,7 +881,7 @@ static void prepare_write_keepalive(struct ceph_connection *con) dout("prepare_write_keepalive %p\n", con); con_out_kvec_reset(con); con_out_kvec_add(con, sizeof (tag_keepalive), &tag_keepalive); - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } /* @@ -873,7 +924,7 @@ static void prepare_write_banner(struct ceph_connection *con) &con->msgr->my_enc_addr); con->out_more = 0; - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); } static int prepare_write_connect(struct ceph_connection *con) @@ -923,7 +974,7 @@ static int prepare_write_connect(struct ceph_connection *con) auth->authorizer_buf); con->out_more = 0; - set_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_set(con, CON_FLAG_WRITE_PENDING); return 0; } @@ -1643,7 +1694,7 @@ static int process_connect(struct ceph_connection *con) le32_to_cpu(con->in_reply.connect_seq)); if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) - set_bit(CON_FLAG_LOSSYTX, &con->flags); + con_flag_set(con, CON_FLAG_LOSSYTX); con->delay = 0; /* reset backoff memory */ @@ -2080,15 +2131,14 @@ do_next: prepare_write_ack(con); goto more; } - if (test_and_clear_bit(CON_FLAG_KEEPALIVE_PENDING, - &con->flags)) { + if (con_flag_test_and_clear(con, CON_FLAG_KEEPALIVE_PENDING)) { prepare_write_keepalive(con); goto more; } } /* Nothing to do! */ - clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_clear(con, CON_FLAG_WRITE_PENDING); dout("try_write nothing else to write.\n"); ret = 0; out: @@ -2268,7 +2318,7 @@ static void queue_con(struct ceph_connection *con) static bool con_sock_closed(struct ceph_connection *con) { - if (!test_and_clear_bit(CON_FLAG_SOCK_CLOSED, &con->flags)) + if (!con_flag_test_and_clear(con, CON_FLAG_SOCK_CLOSED)) return false; #define CASE(x) \ @@ -2295,6 +2345,41 @@ static bool con_sock_closed(struct ceph_connection *con) return true; } +static bool con_backoff(struct ceph_connection *con) +{ + int ret; + + if (!con_flag_test_and_clear(con, CON_FLAG_BACKOFF)) + return false; + + ret = queue_con_delay(con, round_jiffies_relative(con->delay)); + if (ret) { + dout("%s: con %p FAILED to back off %lu\n", __func__, + con, con->delay); + BUG_ON(ret == -ENOENT); + con_flag_set(con, CON_FLAG_BACKOFF); + } + + return true; +} + +/* Finish fault handling; con->mutex must *not* be held here */ + +static void con_fault_finish(struct ceph_connection *con) +{ + /* + * in case we faulted due to authentication, invalidate our + * current tickets so that we can get new ones. + */ + if (con->auth_retry && con->ops->invalidate_authorizer) { + dout("calling invalidate_authorizer()\n"); + con->ops->invalidate_authorizer(con); + } + + if (con->ops->fault) + con->ops->fault(con); +} + /* * Do some work on a connection. Drop a connection ref when we're done. */ @@ -2302,73 +2387,68 @@ static void con_work(struct work_struct *work) { struct ceph_connection *con = container_of(work, struct ceph_connection, work.work); - int ret; + bool fault; mutex_lock(&con->mutex); -restart: - if (con_sock_closed(con)) - goto fault; + while (true) { + int ret; - if (test_and_clear_bit(CON_FLAG_BACKOFF, &con->flags)) { - dout("con_work %p backing off\n", con); - ret = queue_con_delay(con, round_jiffies_relative(con->delay)); - if (ret) { - dout("con_work %p FAILED to back off %lu\n", con, - con->delay); - BUG_ON(ret == -ENOENT); - set_bit(CON_FLAG_BACKOFF, &con->flags); + if ((fault = con_sock_closed(con))) { + dout("%s: con %p SOCK_CLOSED\n", __func__, con); + break; + } + if (con_backoff(con)) { + dout("%s: con %p BACKOFF\n", __func__, con); + break; + } + if (con->state == CON_STATE_STANDBY) { + dout("%s: con %p STANDBY\n", __func__, con); + break; + } + if (con->state == CON_STATE_CLOSED) { + dout("%s: con %p CLOSED\n", __func__, con); + BUG_ON(con->sock); + break; + } + if (con->state == CON_STATE_PREOPEN) { + dout("%s: con %p PREOPEN\n", __func__, con); + BUG_ON(con->sock); } - goto done; - } - if (con->state == CON_STATE_STANDBY) { - dout("con_work %p STANDBY\n", con); - goto done; - } - if (con->state == CON_STATE_CLOSED) { - dout("con_work %p CLOSED\n", con); - BUG_ON(con->sock); - goto done; - } - if (con->state == CON_STATE_PREOPEN) { - dout("con_work OPENING\n"); - BUG_ON(con->sock); - } + ret = try_read(con); + if (ret < 0) { + if (ret == -EAGAIN) + continue; + con->error_msg = "socket error on read"; + fault = true; + break; + } - ret = try_read(con); - if (ret == -EAGAIN) - goto restart; - if (ret < 0) { - con->error_msg = "socket error on read"; - goto fault; - } + ret = try_write(con); + if (ret < 0) { + if (ret == -EAGAIN) + continue; + con->error_msg = "socket error on write"; + fault = true; + } - ret = try_write(con); - if (ret == -EAGAIN) - goto restart; - if (ret < 0) { - con->error_msg = "socket error on write"; - goto fault; + break; /* If we make it to here, we're done */ } - -done: + if (fault) + con_fault(con); mutex_unlock(&con->mutex); -done_unlocked: - con->ops->put(con); - return; -fault: - ceph_fault(con); /* error/fault path */ - goto done_unlocked; -} + if (fault) + con_fault_finish(con); + con->ops->put(con); +} /* * Generic error/fault handler. A retry mechanism is used with * exponential backoff */ -static void ceph_fault(struct ceph_connection *con) - __releases(con->mutex) +static void con_fault(struct ceph_connection *con) { pr_warning("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr), con->error_msg); @@ -2381,10 +2461,10 @@ static void ceph_fault(struct ceph_connection *con) con_close_socket(con); - if (test_bit(CON_FLAG_LOSSYTX, &con->flags)) { + if (con_flag_test(con, CON_FLAG_LOSSYTX)) { dout("fault on LOSSYTX channel, marking CLOSED\n"); con->state = CON_STATE_CLOSED; - goto out_unlock; + return; } if (con->in_msg) { @@ -2401,9 +2481,9 @@ static void ceph_fault(struct ceph_connection *con) /* If there are no messages queued or keepalive pending, place * the connection in a STANDBY state */ if (list_empty(&con->out_queue) && - !test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)) { + !con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)) { dout("fault %p setting STANDBY clearing WRITE_PENDING\n", con); - clear_bit(CON_FLAG_WRITE_PENDING, &con->flags); + con_flag_clear(con, CON_FLAG_WRITE_PENDING); con->state = CON_STATE_STANDBY; } else { /* retry after a delay. */ @@ -2412,23 +2492,9 @@ static void ceph_fault(struct ceph_connection *con) con->delay = BASE_DELAY_INTERVAL; else if (con->delay < MAX_DELAY_INTERVAL) con->delay *= 2; - set_bit(CON_FLAG_BACKOFF, &con->flags); + con_flag_set(con, CON_FLAG_BACKOFF); queue_con(con); } - -out_unlock: - mutex_unlock(&con->mutex); - /* - * in case we faulted due to authentication, invalidate our - * current tickets so that we can get new ones. - */ - if (con->auth_retry && con->ops->invalidate_authorizer) { - dout("calling invalidate_authorizer()\n"); - con->ops->invalidate_authorizer(con); - } - - if (con->ops->fault) - con->ops->fault(con); } @@ -2469,8 +2535,8 @@ static void clear_standby(struct ceph_connection *con) dout("clear_standby %p and ++connect_seq\n", con); con->state = CON_STATE_PREOPEN; con->connect_seq++; - WARN_ON(test_bit(CON_FLAG_WRITE_PENDING, &con->flags)); - WARN_ON(test_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags)); + WARN_ON(con_flag_test(con, CON_FLAG_WRITE_PENDING)); + WARN_ON(con_flag_test(con, CON_FLAG_KEEPALIVE_PENDING)); } } @@ -2511,7 +2577,7 @@ void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) /* if there wasn't anything waiting to send before, queue * new work */ - if (test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) + if (con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_send); @@ -2600,8 +2666,8 @@ void ceph_con_keepalive(struct ceph_connection *con) mutex_lock(&con->mutex); clear_standby(con); mutex_unlock(&con->mutex); - if (test_and_set_bit(CON_FLAG_KEEPALIVE_PENDING, &con->flags) == 0 && - test_and_set_bit(CON_FLAG_WRITE_PENDING, &con->flags) == 0) + if (con_flag_test_and_set(con, CON_FLAG_KEEPALIVE_PENDING) == 0 && + con_flag_test_and_set(con, CON_FLAG_WRITE_PENDING) == 0) queue_con(con); } EXPORT_SYMBOL(ceph_con_keepalive); @@ -2651,9 +2717,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, m->page_alignment = 0; m->pages = NULL; m->pagelist = NULL; +#ifdef CONFIG_BLOCK m->bio = NULL; m->bio_iter = NULL; m->bio_seg = 0; +#endif /* CONFIG_BLOCK */ m->trail = NULL; /* front */ diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 812eb3b46c1..aef5b1062be 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -697,7 +697,7 @@ int ceph_monc_delete_snapid(struct ceph_mon_client *monc, u32 pool, u64 snapid) { return do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, - pool, snapid, 0, 0); + pool, snapid, NULL, 0); } diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index eb9a4447876..d730dd4d8eb 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -23,7 +23,7 @@ static const struct ceph_connection_operations osd_con_ops; -static void send_queued(struct ceph_osd_client *osdc); +static void __send_queued(struct ceph_osd_client *osdc); static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd); static void __register_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); @@ -32,64 +32,12 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, static void __send_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req); -static int op_needs_trail(int op) -{ - switch (op) { - case CEPH_OSD_OP_GETXATTR: - case CEPH_OSD_OP_SETXATTR: - case CEPH_OSD_OP_CMPXATTR: - case CEPH_OSD_OP_CALL: - case CEPH_OSD_OP_NOTIFY: - return 1; - default: - return 0; - } -} - static int op_has_extent(int op) { return (op == CEPH_OSD_OP_READ || op == CEPH_OSD_OP_WRITE); } -int ceph_calc_raw_layout(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, - u64 snapid, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op) -{ - struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; - u64 orig_len = *plen; - u64 objoff, objlen; /* extent in object */ - int r; - - reqhead->snapid = cpu_to_le64(snapid); - - /* object extent? */ - r = ceph_calc_file_object_mapping(layout, off, plen, bno, - &objoff, &objlen); - if (r < 0) - return r; - if (*plen < orig_len) - dout(" skipping last %llu, final file extent %llu~%llu\n", - orig_len - *plen, off, *plen); - - if (op_has_extent(op->op)) { - op->extent.offset = objoff; - op->extent.length = objlen; - } - req->r_num_pages = calc_pages_for(off, *plen); - req->r_page_alignment = off & ~PAGE_MASK; - if (op->op == CEPH_OSD_OP_WRITE) - op->payload_len = *plen; - - dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", - *bno, objoff, objlen, req->r_num_pages); - return 0; -} -EXPORT_SYMBOL(ceph_calc_raw_layout); - /* * Implement client access to distributed object storage cluster. * @@ -115,20 +63,48 @@ EXPORT_SYMBOL(ceph_calc_raw_layout); * * fill osd op in request message. */ -static int calc_layout(struct ceph_osd_client *osdc, - struct ceph_vino vino, +static int calc_layout(struct ceph_vino vino, struct ceph_file_layout *layout, u64 off, u64 *plen, struct ceph_osd_request *req, struct ceph_osd_req_op *op) { - u64 bno; + u64 orig_len = *plen; + u64 bno = 0; + u64 objoff = 0; + u64 objlen = 0; int r; - r = ceph_calc_raw_layout(osdc, layout, vino.snap, off, - plen, &bno, req, op); + /* object extent? */ + r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno, + &objoff, &objlen); if (r < 0) return r; + if (objlen < orig_len) { + *plen = objlen; + dout(" skipping last %llu, final file extent %llu~%llu\n", + orig_len - *plen, off, *plen); + } + + if (op_has_extent(op->op)) { + u32 osize = le32_to_cpu(layout->fl_object_size); + op->extent.offset = objoff; + op->extent.length = objlen; + if (op->extent.truncate_size <= off - objoff) { + op->extent.truncate_size = 0; + } else { + op->extent.truncate_size -= off - objoff; + if (op->extent.truncate_size > osize) + op->extent.truncate_size = osize; + } + } + req->r_num_pages = calc_pages_for(off, *plen); + req->r_page_alignment = off & ~PAGE_MASK; + if (op->op == CEPH_OSD_OP_WRITE) + op->payload_len = *plen; + + dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", + bno, objoff, objlen, req->r_num_pages); snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); req->r_oid_len = strlen(req->r_oid); @@ -148,25 +124,19 @@ void ceph_osdc_release_request(struct kref *kref) if (req->r_request) ceph_msg_put(req->r_request); if (req->r_con_filling_msg) { - dout("%s revoking pages %p from con %p\n", __func__, - req->r_pages, req->r_con_filling_msg); + dout("%s revoking msg %p from con %p\n", __func__, + req->r_reply, req->r_con_filling_msg); ceph_msg_revoke_incoming(req->r_reply); req->r_con_filling_msg->ops->put(req->r_con_filling_msg); + req->r_con_filling_msg = NULL; } if (req->r_reply) ceph_msg_put(req->r_reply); if (req->r_own_pages) ceph_release_page_vector(req->r_pages, req->r_num_pages); -#ifdef CONFIG_BLOCK - if (req->r_bio) - bio_put(req->r_bio); -#endif ceph_put_snap_context(req->r_snapc); - if (req->r_trail) { - ceph_pagelist_release(req->r_trail); - kfree(req->r_trail); - } + ceph_pagelist_release(&req->r_trail); if (req->r_mempool) mempool_free(req, req->r_osdc->req_mempool); else @@ -174,37 +144,25 @@ void ceph_osdc_release_request(struct kref *kref) } EXPORT_SYMBOL(ceph_osdc_release_request); -static int get_num_ops(struct ceph_osd_req_op *ops, int *needs_trail) -{ - int i = 0; - - if (needs_trail) - *needs_trail = 0; - while (ops[i].op) { - if (needs_trail && op_needs_trail(ops[i].op)) - *needs_trail = 1; - i++; - } - - return i; -} - struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, - int flags, struct ceph_snap_context *snapc, - struct ceph_osd_req_op *ops, + unsigned int num_ops, bool use_mempool, - gfp_t gfp_flags, - struct page **pages, - struct bio *bio) + gfp_t gfp_flags) { struct ceph_osd_request *req; struct ceph_msg *msg; - int needs_trail; - int num_op = get_num_ops(ops, &needs_trail); - size_t msg_size = sizeof(struct ceph_osd_request_head); - - msg_size += num_op*sizeof(struct ceph_osd_op); + size_t msg_size; + + msg_size = 4 + 4 + 8 + 8 + 4+8; + msg_size += 2 + 4 + 8 + 4 + 4; /* oloc */ + msg_size += 1 + 8 + 4 + 4; /* pg_t */ + msg_size += 4 + MAX_OBJ_NAME_SIZE; + msg_size += 2 + num_ops*sizeof(struct ceph_osd_op); + msg_size += 8; /* snapid */ + msg_size += 8; /* snap_seq */ + msg_size += 8 * (snapc ? snapc->num_snaps : 0); /* snaps */ + msg_size += 4; if (use_mempool) { req = mempool_alloc(osdc->req_mempool, gfp_flags); @@ -228,10 +186,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, INIT_LIST_HEAD(&req->r_req_lru_item); INIT_LIST_HEAD(&req->r_osd_item); - req->r_flags = flags; - - WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); - /* create reply message */ if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); @@ -244,20 +198,9 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, } req->r_reply = msg; - /* allocate space for the trailing data */ - if (needs_trail) { - req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); - if (!req->r_trail) { - ceph_osdc_put_request(req); - return NULL; - } - ceph_pagelist_init(req->r_trail); - } + ceph_pagelist_init(&req->r_trail); /* create request message; allow space for oid */ - msg_size += MAX_OBJ_NAME_SIZE; - if (snapc) - msg_size += sizeof(u64) * snapc->num_snaps; if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op, 0); else @@ -270,13 +213,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, memset(msg->front.iov_base, 0, msg->front.iov_len); req->r_request = msg; - req->r_pages = pages; -#ifdef CONFIG_BLOCK - if (bio) { - req->r_bio = bio; - bio_get(req->r_bio); - } -#endif return req; } @@ -289,6 +225,8 @@ static void osd_req_encode_op(struct ceph_osd_request *req, dst->op = cpu_to_le16(src->op); switch (src->op) { + case CEPH_OSD_OP_STAT: + break; case CEPH_OSD_OP_READ: case CEPH_OSD_OP_WRITE: dst->extent.offset = @@ -300,52 +238,20 @@ static void osd_req_encode_op(struct ceph_osd_request *req, dst->extent.truncate_seq = cpu_to_le32(src->extent.truncate_seq); break; - - case CEPH_OSD_OP_GETXATTR: - case CEPH_OSD_OP_SETXATTR: - case CEPH_OSD_OP_CMPXATTR: - BUG_ON(!req->r_trail); - - dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); - dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); - dst->xattr.cmp_op = src->xattr.cmp_op; - dst->xattr.cmp_mode = src->xattr.cmp_mode; - ceph_pagelist_append(req->r_trail, src->xattr.name, - src->xattr.name_len); - ceph_pagelist_append(req->r_trail, src->xattr.val, - src->xattr.value_len); - break; case CEPH_OSD_OP_CALL: - BUG_ON(!req->r_trail); - dst->cls.class_len = src->cls.class_len; dst->cls.method_len = src->cls.method_len; dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); - ceph_pagelist_append(req->r_trail, src->cls.class_name, + ceph_pagelist_append(&req->r_trail, src->cls.class_name, src->cls.class_len); - ceph_pagelist_append(req->r_trail, src->cls.method_name, + ceph_pagelist_append(&req->r_trail, src->cls.method_name, src->cls.method_len); - ceph_pagelist_append(req->r_trail, src->cls.indata, + ceph_pagelist_append(&req->r_trail, src->cls.indata, src->cls.indata_len); break; - case CEPH_OSD_OP_ROLLBACK: - dst->snap.snapid = cpu_to_le64(src->snap.snapid); - break; case CEPH_OSD_OP_STARTSYNC: break; - case CEPH_OSD_OP_NOTIFY: - { - __le32 prot_ver = cpu_to_le32(src->watch.prot_ver); - __le32 timeout = cpu_to_le32(src->watch.timeout); - - BUG_ON(!req->r_trail); - - ceph_pagelist_append(req->r_trail, - &prot_ver, sizeof(prot_ver)); - ceph_pagelist_append(req->r_trail, - &timeout, sizeof(timeout)); - } case CEPH_OSD_OP_NOTIFY_ACK: case CEPH_OSD_OP_WATCH: dst->watch.cookie = cpu_to_le64(src->watch.cookie); @@ -356,6 +262,64 @@ static void osd_req_encode_op(struct ceph_osd_request *req, pr_err("unrecognized osd opcode %d\n", dst->op); WARN_ON(1); break; + case CEPH_OSD_OP_MAPEXT: + case CEPH_OSD_OP_MASKTRUNC: + case CEPH_OSD_OP_SPARSE_READ: + case CEPH_OSD_OP_NOTIFY: + case CEPH_OSD_OP_ASSERT_VER: + case CEPH_OSD_OP_WRITEFULL: + case CEPH_OSD_OP_TRUNCATE: + case CEPH_OSD_OP_ZERO: + case CEPH_OSD_OP_DELETE: + case CEPH_OSD_OP_APPEND: + case CEPH_OSD_OP_SETTRUNC: + case CEPH_OSD_OP_TRIMTRUNC: + case CEPH_OSD_OP_TMAPUP: + case CEPH_OSD_OP_TMAPPUT: + case CEPH_OSD_OP_TMAPGET: + case CEPH_OSD_OP_CREATE: + case CEPH_OSD_OP_ROLLBACK: + case CEPH_OSD_OP_OMAPGETKEYS: + case CEPH_OSD_OP_OMAPGETVALS: + case CEPH_OSD_OP_OMAPGETHEADER: + case CEPH_OSD_OP_OMAPGETVALSBYKEYS: + case CEPH_OSD_OP_MODE_RD: + case CEPH_OSD_OP_OMAPSETVALS: + case CEPH_OSD_OP_OMAPSETHEADER: + case CEPH_OSD_OP_OMAPCLEAR: + case CEPH_OSD_OP_OMAPRMKEYS: + case CEPH_OSD_OP_OMAP_CMP: + case CEPH_OSD_OP_CLONERANGE: + case CEPH_OSD_OP_ASSERT_SRC_VERSION: + case CEPH_OSD_OP_SRC_CMPXATTR: + case CEPH_OSD_OP_GETXATTR: + case CEPH_OSD_OP_GETXATTRS: + case CEPH_OSD_OP_CMPXATTR: + case CEPH_OSD_OP_SETXATTR: + case CEPH_OSD_OP_SETXATTRS: + case CEPH_OSD_OP_RESETXATTRS: + case CEPH_OSD_OP_RMXATTR: + case CEPH_OSD_OP_PULL: + case CEPH_OSD_OP_PUSH: + case CEPH_OSD_OP_BALANCEREADS: + case CEPH_OSD_OP_UNBALANCEREADS: + case CEPH_OSD_OP_SCRUB: + case CEPH_OSD_OP_SCRUB_RESERVE: + case CEPH_OSD_OP_SCRUB_UNRESERVE: + case CEPH_OSD_OP_SCRUB_STOP: + case CEPH_OSD_OP_SCRUB_MAP: + case CEPH_OSD_OP_WRLOCK: + case CEPH_OSD_OP_WRUNLOCK: + case CEPH_OSD_OP_RDLOCK: + case CEPH_OSD_OP_RDUNLOCK: + case CEPH_OSD_OP_UPLOCK: + case CEPH_OSD_OP_DNLOCK: + case CEPH_OSD_OP_PGLS: + case CEPH_OSD_OP_PGLS_FILTER: + pr_err("unsupported osd opcode %s\n", + ceph_osd_op_name(dst->op)); + WARN_ON(1); + break; } dst->payload_len = cpu_to_le32(src->payload_len); } @@ -365,75 +329,95 @@ static void osd_req_encode_op(struct ceph_osd_request *req, * */ void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 *plen, + u64 off, u64 len, unsigned int num_ops, struct ceph_osd_req_op *src_ops, - struct ceph_snap_context *snapc, - struct timespec *mtime, - const char *oid, - int oid_len) + struct ceph_snap_context *snapc, u64 snap_id, + struct timespec *mtime) { struct ceph_msg *msg = req->r_request; - struct ceph_osd_request_head *head; struct ceph_osd_req_op *src_op; - struct ceph_osd_op *op; void *p; - int num_op = get_num_ops(src_ops, NULL); - size_t msg_size = sizeof(*head) + num_op*sizeof(*op); + size_t msg_size; int flags = req->r_flags; - u64 data_len = 0; + u64 data_len; int i; - head = msg->front.iov_base; - op = (void *)(head + 1); - p = (void *)(op + num_op); - + req->r_num_ops = num_ops; + req->r_snapid = snap_id; req->r_snapc = ceph_get_snap_context(snapc); - head->client_inc = cpu_to_le32(1); /* always, for now. */ - head->flags = cpu_to_le32(flags); - if (flags & CEPH_OSD_FLAG_WRITE) - ceph_encode_timespec(&head->mtime, mtime); - head->num_ops = cpu_to_le16(num_op); - - - /* fill in oid */ - head->object_len = cpu_to_le32(oid_len); - memcpy(p, oid, oid_len); - p += oid_len; + /* encode request */ + msg->hdr.version = cpu_to_le16(4); + p = msg->front.iov_base; + ceph_encode_32(&p, 1); /* client_inc is always 1 */ + req->r_request_osdmap_epoch = p; + p += 4; + req->r_request_flags = p; + p += 4; + if (req->r_flags & CEPH_OSD_FLAG_WRITE) + ceph_encode_timespec(p, mtime); + p += sizeof(struct ceph_timespec); + req->r_request_reassert_version = p; + p += sizeof(struct ceph_eversion); /* will get filled in */ + + /* oloc */ + ceph_encode_8(&p, 4); + ceph_encode_8(&p, 4); + ceph_encode_32(&p, 8 + 4 + 4); + req->r_request_pool = p; + p += 8; + ceph_encode_32(&p, -1); /* preferred */ + ceph_encode_32(&p, 0); /* key len */ + + ceph_encode_8(&p, 1); + req->r_request_pgid = p; + p += 8 + 4; + ceph_encode_32(&p, -1); /* preferred */ + + /* oid */ + ceph_encode_32(&p, req->r_oid_len); + memcpy(p, req->r_oid, req->r_oid_len); + dout("oid '%.*s' len %d\n", req->r_oid_len, req->r_oid, req->r_oid_len); + p += req->r_oid_len; + + /* ops */ + ceph_encode_16(&p, num_ops); src_op = src_ops; - while (src_op->op) { - osd_req_encode_op(req, op, src_op); - src_op++; - op++; + req->r_request_ops = p; + for (i = 0; i < num_ops; i++, src_op++) { + osd_req_encode_op(req, p, src_op); + p += sizeof(struct ceph_osd_op); } - if (req->r_trail) - data_len += req->r_trail->length; - - if (snapc) { - head->snap_seq = cpu_to_le64(snapc->seq); - head->num_snaps = cpu_to_le32(snapc->num_snaps); + /* snaps */ + ceph_encode_64(&p, req->r_snapid); + ceph_encode_64(&p, req->r_snapc ? req->r_snapc->seq : 0); + ceph_encode_32(&p, req->r_snapc ? req->r_snapc->num_snaps : 0); + if (req->r_snapc) { for (i = 0; i < snapc->num_snaps; i++) { - put_unaligned_le64(snapc->snaps[i], p); - p += sizeof(u64); + ceph_encode_64(&p, req->r_snapc->snaps[i]); } } + req->r_request_attempts = p; + p += 4; + + data_len = req->r_trail.length; if (flags & CEPH_OSD_FLAG_WRITE) { req->r_request->hdr.data_off = cpu_to_le16(off); - req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len); - } else if (data_len) { - req->r_request->hdr.data_off = 0; - req->r_request->hdr.data_len = cpu_to_le32(data_len); + data_len += len; } - + req->r_request->hdr.data_len = cpu_to_le32(data_len); req->r_request->page_alignment = req->r_page_alignment; BUG_ON(p > msg->front.iov_base + msg->front.iov_len); msg_size = p - msg->front.iov_base; msg->front.iov_len = msg_size; msg->hdr.front_len = cpu_to_le32(msg_size); + + dout("build_request msg_size was %d num_ops %d\n", (int)msg_size, + num_ops); return; } EXPORT_SYMBOL(ceph_osdc_build_request); @@ -459,34 +443,33 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply, + bool use_mempool, int page_align) { - struct ceph_osd_req_op ops[3]; + struct ceph_osd_req_op ops[2]; struct ceph_osd_request *req; + unsigned int num_op = 1; int r; + memset(&ops, 0, sizeof ops); + ops[0].op = opcode; ops[0].extent.truncate_seq = truncate_seq; ops[0].extent.truncate_size = truncate_size; - ops[0].payload_len = 0; if (do_sync) { ops[1].op = CEPH_OSD_OP_STARTSYNC; - ops[1].payload_len = 0; - ops[2].op = 0; - } else - ops[1].op = 0; - - req = ceph_osdc_alloc_request(osdc, flags, - snapc, ops, - use_mempool, - GFP_NOFS, NULL, NULL); + num_op++; + } + + req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool, + GFP_NOFS); if (!req) return ERR_PTR(-ENOMEM); + req->r_flags = flags; /* calculate max write size */ - r = calc_layout(osdc, vino, layout, off, plen, req, ops); + r = calc_layout(vino, layout, off, plen, req, ops); if (r < 0) return ERR_PTR(r); req->r_file_layout = *layout; /* keep a copy */ @@ -496,10 +479,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, req->r_num_pages = calc_pages_for(page_align, *plen); req->r_page_alignment = page_align; - ceph_osdc_build_request(req, off, plen, ops, - snapc, - mtime, - req->r_oid, req->r_oid_len); + ceph_osdc_build_request(req, off, *plen, num_op, ops, + snapc, vino.snap, mtime); return req; } @@ -623,8 +604,8 @@ static void osd_reset(struct ceph_connection *con) down_read(&osdc->map_sem); mutex_lock(&osdc->request_mutex); __kick_osd_requests(osdc, osd); + __send_queued(osdc); mutex_unlock(&osdc->request_mutex); - send_queued(osdc); up_read(&osdc->map_sem); } @@ -739,31 +720,35 @@ static void remove_old_osds(struct ceph_osd_client *osdc) */ static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) { - struct ceph_osd_request *req; - int ret = 0; + struct ceph_entity_addr *peer_addr; dout("__reset_osd %p osd%d\n", osd, osd->o_osd); if (list_empty(&osd->o_requests) && list_empty(&osd->o_linger_requests)) { __remove_osd(osdc, osd); - ret = -ENODEV; - } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], - &osd->o_con.peer_addr, - sizeof(osd->o_con.peer_addr)) == 0 && - !ceph_con_opened(&osd->o_con)) { + + return -ENODEV; + } + + peer_addr = &osdc->osdmap->osd_addr[osd->o_osd]; + if (!memcmp(peer_addr, &osd->o_con.peer_addr, sizeof (*peer_addr)) && + !ceph_con_opened(&osd->o_con)) { + struct ceph_osd_request *req; + dout(" osd addr hasn't changed and connection never opened," " letting msgr retry"); /* touch each r_stamp for handle_timeout()'s benfit */ list_for_each_entry(req, &osd->o_requests, r_osd_item) req->r_stamp = jiffies; - ret = -EAGAIN; - } else { - ceph_con_close(&osd->o_con); - ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, - &osdc->osdmap->osd_addr[osd->o_osd]); - osd->o_incarnation++; + + return -EAGAIN; } - return ret; + + ceph_con_close(&osd->o_con); + ceph_con_open(&osd->o_con, CEPH_ENTITY_TYPE_OSD, osd->o_osd, peer_addr); + osd->o_incarnation++; + + return 0; } static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new) @@ -961,20 +946,18 @@ EXPORT_SYMBOL(ceph_osdc_set_request_linger); static int __map_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req, int force_resend) { - struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; struct ceph_pg pgid; int acting[CEPH_PG_MAX_SIZE]; int o = -1, num = 0; int err; dout("map_request %p tid %lld\n", req, req->r_tid); - err = ceph_calc_object_layout(&reqhead->layout, req->r_oid, + err = ceph_calc_object_layout(&pgid, req->r_oid, &req->r_file_layout, osdc->osdmap); if (err) { list_move(&req->r_req_lru_item, &osdc->req_notarget); return err; } - pgid = reqhead->layout.ol_pgid; req->r_pgid = pgid; err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); @@ -991,8 +974,8 @@ static int __map_request(struct ceph_osd_client *osdc, (req->r_osd == NULL && o == -1)) return 0; /* no change */ - dout("map_request tid %llu pgid %d.%x osd%d (was osd%d)\n", - req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, + dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n", + req->r_tid, pgid.pool, pgid.seed, o, req->r_osd ? req->r_osd->o_osd : -1); /* record full pg acting set */ @@ -1041,15 +1024,22 @@ out: static void __send_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { - struct ceph_osd_request_head *reqhead; - - dout("send_request %p tid %llu to osd%d flags %d\n", - req, req->r_tid, req->r_osd->o_osd, req->r_flags); + void *p; - reqhead = req->r_request->front.iov_base; - reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch); - reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ - reqhead->reassert_version = req->r_reassert_version; + dout("send_request %p tid %llu to osd%d flags %d pg %lld.%x\n", + req, req->r_tid, req->r_osd->o_osd, req->r_flags, + (unsigned long long)req->r_pgid.pool, req->r_pgid.seed); + + /* fill in message content that changes each time we send it */ + put_unaligned_le32(osdc->osdmap->epoch, req->r_request_osdmap_epoch); + put_unaligned_le32(req->r_flags, req->r_request_flags); + put_unaligned_le64(req->r_pgid.pool, req->r_request_pool); + p = req->r_request_pgid; + ceph_encode_64(&p, req->r_pgid.pool); + ceph_encode_32(&p, req->r_pgid.seed); + put_unaligned_le64(1, req->r_request_attempts); /* FIXME */ + memcpy(req->r_request_reassert_version, &req->r_reassert_version, + sizeof(req->r_reassert_version)); req->r_stamp = jiffies; list_move_tail(&req->r_req_lru_item, &osdc->req_lru); @@ -1062,16 +1052,13 @@ static void __send_request(struct ceph_osd_client *osdc, /* * Send any requests in the queue (req_unsent). */ -static void send_queued(struct ceph_osd_client *osdc) +static void __send_queued(struct ceph_osd_client *osdc) { struct ceph_osd_request *req, *tmp; - dout("send_queued\n"); - mutex_lock(&osdc->request_mutex); - list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) { + dout("__send_queued\n"); + list_for_each_entry_safe(req, tmp, &osdc->req_unsent, r_req_lru_item) __send_request(osdc, req); - } - mutex_unlock(&osdc->request_mutex); } /* @@ -1123,8 +1110,8 @@ static void handle_timeout(struct work_struct *work) } __schedule_osd_timeout(osdc); + __send_queued(osdc); mutex_unlock(&osdc->request_mutex); - send_queued(osdc); up_read(&osdc->map_sem); } @@ -1152,6 +1139,26 @@ static void complete_request(struct ceph_osd_request *req) complete_all(&req->r_safe_completion); /* fsync waiter */ } +static int __decode_pgid(void **p, void *end, struct ceph_pg *pgid) +{ + __u8 v; + + ceph_decode_need(p, end, 1 + 8 + 4 + 4, bad); + v = ceph_decode_8(p); + if (v > 1) { + pr_warning("do not understand pg encoding %d > 1", v); + return -EINVAL; + } + pgid->pool = ceph_decode_64(p); + pgid->seed = ceph_decode_32(p); + *p += 4; + return 0; + +bad: + pr_warning("incomplete pg encoding"); + return -EINVAL; +} + /* * handle osd op reply. either call the callback if it is specified, * or do the completion to wake up the waiting thread. @@ -1159,22 +1166,42 @@ static void complete_request(struct ceph_osd_request *req) static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, struct ceph_connection *con) { - struct ceph_osd_reply_head *rhead = msg->front.iov_base; + void *p, *end; struct ceph_osd_request *req; u64 tid; - int numops, object_len, flags; + int object_len; + int numops, payload_len, flags; s32 result; + s32 retry_attempt; + struct ceph_pg pg; + int err; + u32 reassert_epoch; + u64 reassert_version; + u32 osdmap_epoch; + int i; tid = le64_to_cpu(msg->hdr.tid); - if (msg->front.iov_len < sizeof(*rhead)) - goto bad; - numops = le32_to_cpu(rhead->num_ops); - object_len = le32_to_cpu(rhead->object_len); - result = le32_to_cpu(rhead->result); - if (msg->front.iov_len != sizeof(*rhead) + object_len + - numops * sizeof(struct ceph_osd_op)) + dout("handle_reply %p tid %llu\n", msg, tid); + + p = msg->front.iov_base; + end = p + msg->front.iov_len; + + ceph_decode_need(&p, end, 4, bad); + object_len = ceph_decode_32(&p); + ceph_decode_need(&p, end, object_len, bad); + p += object_len; + + err = __decode_pgid(&p, end, &pg); + if (err) goto bad; - dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); + + ceph_decode_need(&p, end, 8 + 4 + 4 + 8 + 4, bad); + flags = ceph_decode_64(&p); + result = ceph_decode_32(&p); + reassert_epoch = ceph_decode_32(&p); + reassert_version = ceph_decode_64(&p); + osdmap_epoch = ceph_decode_32(&p); + /* lookup */ mutex_lock(&osdc->request_mutex); req = __lookup_request(osdc, tid); @@ -1184,7 +1211,38 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, return; } ceph_osdc_get_request(req); - flags = le32_to_cpu(rhead->flags); + + dout("handle_reply %p tid %llu req %p result %d\n", msg, tid, + req, result); + + ceph_decode_need(&p, end, 4, bad); + numops = ceph_decode_32(&p); + if (numops > CEPH_OSD_MAX_OP) + goto bad_put; + if (numops != req->r_num_ops) + goto bad_put; + payload_len = 0; + ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad); + for (i = 0; i < numops; i++) { + struct ceph_osd_op *op = p; + int len; + + len = le32_to_cpu(op->payload_len); + req->r_reply_op_len[i] = len; + dout(" op %d has %d bytes\n", i, len); + payload_len += len; + p += sizeof(*op); + } + if (payload_len != le32_to_cpu(msg->hdr.data_len)) { + pr_warning("sum of op payload lens %d != data_len %d", + payload_len, le32_to_cpu(msg->hdr.data_len)); + goto bad_put; + } + + ceph_decode_need(&p, end, 4 + numops * 4, bad); + retry_attempt = ceph_decode_32(&p); + for (i = 0; i < numops; i++) + req->r_reply_op_result[i] = ceph_decode_32(&p); /* * if this connection filled our message, drop our reference now, to @@ -1199,7 +1257,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, if (!req->r_got_reply) { unsigned int bytes; - req->r_result = le32_to_cpu(rhead->result); + req->r_result = result; bytes = le32_to_cpu(msg->hdr.data_len); dout("handle_reply result %d bytes %d\n", req->r_result, bytes); @@ -1207,7 +1265,8 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, req->r_result = bytes; /* in case this is a write and we need to replay, */ - req->r_reassert_version = rhead->reassert_version; + req->r_reassert_version.epoch = cpu_to_le32(reassert_epoch); + req->r_reassert_version.version = cpu_to_le64(reassert_version); req->r_got_reply = 1; } else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) { @@ -1242,10 +1301,11 @@ done: ceph_osdc_put_request(req); return; +bad_put: + ceph_osdc_put_request(req); bad: - pr_err("corrupt osd_op_reply got %d %d expected %d\n", - (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len), - (int)sizeof(*rhead)); + pr_err("corrupt osd_op_reply got %d %d\n", + (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len)); ceph_msg_dump(msg); } @@ -1462,7 +1522,9 @@ done: if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) ceph_monc_request_next_osdmap(&osdc->client->monc); - send_queued(osdc); + mutex_lock(&osdc->request_mutex); + __send_queued(osdc); + mutex_unlock(&osdc->request_mutex); up_read(&osdc->map_sem); wake_up_all(&osdc->client->auth_wq); return; @@ -1556,8 +1618,7 @@ static void __remove_event(struct ceph_osd_event *event) int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), - int one_shot, void *data, - struct ceph_osd_event **pevent) + void *data, struct ceph_osd_event **pevent) { struct ceph_osd_event *event; @@ -1567,14 +1628,13 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, dout("create_event %p\n", event); event->cb = event_cb; - event->one_shot = one_shot; + event->one_shot = 0; event->data = data; event->osdc = osdc; INIT_LIST_HEAD(&event->osd_node); RB_CLEAR_NODE(&event->node); kref_init(&event->kref); /* one ref for us */ kref_get(&event->kref); /* one ref for the caller */ - init_completion(&event->completion); spin_lock(&osdc->event_lock); event->cookie = ++osdc->event_count; @@ -1610,7 +1670,6 @@ static void do_event_work(struct work_struct *work) dout("do_event_work completing %p\n", event); event->cb(ver, notify_id, opcode, event->data); - complete(&event->completion); dout("do_event_work completed %p\n", event); ceph_osdc_put_event(event); kfree(event_work); @@ -1620,7 +1679,8 @@ static void do_event_work(struct work_struct *work) /* * Process osd watch notifications */ -void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) +static void handle_watch_notify(struct ceph_osd_client *osdc, + struct ceph_msg *msg) { void *p, *end; u8 proto_ver; @@ -1641,9 +1701,8 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) spin_lock(&osdc->event_lock); event = __find_event(osdc, cookie); if (event) { + BUG_ON(event->one_shot); get_event(event); - if (event->one_shot) - __remove_event(event); } spin_unlock(&osdc->event_lock); dout("handle_watch_notify cookie %lld ver %lld event %p\n", @@ -1668,7 +1727,6 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) return; done_err: - complete(&event->completion); ceph_osdc_put_event(event); return; @@ -1677,21 +1735,6 @@ bad: return; } -int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout) -{ - int err; - - dout("wait_event %p\n", event); - err = wait_for_completion_interruptible_timeout(&event->completion, - timeout * HZ); - ceph_osdc_put_event(event); - if (err > 0) - err = 0; - dout("wait_event %p returns %d\n", event, err); - return err; -} -EXPORT_SYMBOL(ceph_osdc_wait_event); - /* * Register request, send initial attempt. */ @@ -1706,7 +1749,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, #ifdef CONFIG_BLOCK req->r_request->bio = req->r_bio; #endif - req->r_request->trail = req->r_trail; + req->r_request->trail = &req->r_trail; register_request(osdc, req); @@ -1865,7 +1908,6 @@ out_mempool: out: return err; } -EXPORT_SYMBOL(ceph_osdc_init); void ceph_osdc_stop(struct ceph_osd_client *osdc) { @@ -1882,7 +1924,6 @@ void ceph_osdc_stop(struct ceph_osd_client *osdc) ceph_msgpool_destroy(&osdc->msgpool_op); ceph_msgpool_destroy(&osdc->msgpool_op_reply); } -EXPORT_SYMBOL(ceph_osdc_stop); /* * Read some contiguous pages. If we cross a stripe boundary, shorten @@ -1902,7 +1943,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, req = ceph_osdc_new_request(osdc, layout, vino, off, plen, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, - false, 1, page_align); + false, page_align); if (IS_ERR(req)) return PTR_ERR(req); @@ -1931,8 +1972,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - struct page **pages, int num_pages, - int flags, int do_sync, bool nofail) + struct page **pages, int num_pages) { struct ceph_osd_request *req; int rc = 0; @@ -1941,11 +1981,10 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, BUG_ON(vino.snap != CEPH_NOSNAP); req = ceph_osdc_new_request(osdc, layout, vino, off, &len, CEPH_OSD_OP_WRITE, - flags | CEPH_OSD_FLAG_ONDISK | - CEPH_OSD_FLAG_WRITE, - snapc, do_sync, + CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, + snapc, 0, truncate_seq, truncate_size, mtime, - nofail, 1, page_align); + true, page_align); if (IS_ERR(req)) return PTR_ERR(req); @@ -1954,7 +1993,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, dout("writepages %llu~%llu (%d pages)\n", off, len, req->r_num_pages); - rc = ceph_osdc_start_request(osdc, req, nofail); + rc = ceph_osdc_start_request(osdc, req, true); if (!rc) rc = ceph_osdc_wait_request(osdc, req); @@ -2047,7 +2086,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, if (data_len > 0) { int want = calc_pages_for(req->r_page_alignment, data_len); - if (unlikely(req->r_num_pages < want)) { + if (req->r_pages && unlikely(req->r_num_pages < want)) { pr_warning("tid %lld reply has %d bytes %d pages, we" " had only %d pages ready\n", tid, data_len, want, req->r_num_pages); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index de73214b5d2..69bc4bf89e3 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -13,26 +13,18 @@ char *ceph_osdmap_state_str(char *str, int len, int state) { - int flag = 0; - if (!len) - goto done; - - *str = '\0'; - if (state) { - if (state & CEPH_OSD_EXISTS) { - snprintf(str, len, "exists"); - flag = 1; - } - if (state & CEPH_OSD_UP) { - snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""), - "up"); - flag = 1; - } - } else { + return str; + + if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP)) + snprintf(str, len, "exists, up"); + else if (state & CEPH_OSD_EXISTS) + snprintf(str, len, "exists"); + else if (state & CEPH_OSD_UP) + snprintf(str, len, "up"); + else snprintf(str, len, "doesn't exist"); - } -done: + return str; } @@ -53,13 +45,8 @@ static int calc_bits_of(unsigned int t) */ static void calc_pg_masks(struct ceph_pg_pool_info *pi) { - pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1; - pi->pgp_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1; - pi->lpg_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1; - pi->lpgp_num_mask = - (1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1; + pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1; + pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1; } /* @@ -170,6 +157,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) c->choose_local_tries = 2; c->choose_local_fallback_tries = 5; c->choose_total_tries = 19; + c->chooseleaf_descend_once = 0; ceph_decode_need(p, end, 4*sizeof(u32), bad); magic = ceph_decode_32(p); @@ -336,6 +324,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end) dout("crush decode tunable choose_total_tries = %d", c->choose_total_tries); + ceph_decode_need(p, end, sizeof(u32), done); + c->chooseleaf_descend_once = ceph_decode_32(p); + dout("crush decode tunable chooseleaf_descend_once = %d", + c->chooseleaf_descend_once); + done: dout("crush_decode success\n"); return c; @@ -354,12 +347,13 @@ bad: */ static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) { - u64 a = *(u64 *)&l; - u64 b = *(u64 *)&r; - - if (a < b) + if (l.pool < r.pool) + return -1; + if (l.pool > r.pool) + return 1; + if (l.seed < r.seed) return -1; - if (a > b) + if (l.seed > r.seed) return 1; return 0; } @@ -405,8 +399,8 @@ static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, } else if (c > 0) { n = n->rb_right; } else { - dout("__lookup_pg_mapping %llx got %p\n", - *(u64 *)&pgid, pg); + dout("__lookup_pg_mapping %lld.%x got %p\n", + pgid.pool, pgid.seed, pg); return pg; } } @@ -418,12 +412,13 @@ static int __remove_pg_mapping(struct rb_root *root, struct ceph_pg pgid) struct ceph_pg_mapping *pg = __lookup_pg_mapping(root, pgid); if (pg) { - dout("__remove_pg_mapping %llx %p\n", *(u64 *)&pgid, pg); + dout("__remove_pg_mapping %lld.%x %p\n", pgid.pool, pgid.seed, + pg); rb_erase(&pg->node, root); kfree(pg); return 0; } - dout("__remove_pg_mapping %llx dne\n", *(u64 *)&pgid); + dout("__remove_pg_mapping %lld.%x dne\n", pgid.pool, pgid.seed); return -ENOENT; } @@ -452,7 +447,7 @@ static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new) return 0; } -static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) +static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, u64 id) { struct ceph_pg_pool_info *pi; struct rb_node *n = root->rb_node; @@ -508,24 +503,57 @@ static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) { - unsigned int n, m; + u8 ev, cv; + unsigned len, num; + void *pool_end; + + ceph_decode_need(p, end, 2 + 4, bad); + ev = ceph_decode_8(p); /* encoding version */ + cv = ceph_decode_8(p); /* compat version */ + if (ev < 5) { + pr_warning("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv); + return -EINVAL; + } + if (cv > 7) { + pr_warning("got v %d cv %d > 7 of ceph_pg_pool\n", ev, cv); + return -EINVAL; + } + len = ceph_decode_32(p); + ceph_decode_need(p, end, len, bad); + pool_end = *p + len; - ceph_decode_copy(p, &pi->v, sizeof(pi->v)); - calc_pg_masks(pi); + pi->type = ceph_decode_8(p); + pi->size = ceph_decode_8(p); + pi->crush_ruleset = ceph_decode_8(p); + pi->object_hash = ceph_decode_8(p); + + pi->pg_num = ceph_decode_32(p); + pi->pgp_num = ceph_decode_32(p); + + *p += 4 + 4; /* skip lpg* */ + *p += 4; /* skip last_change */ + *p += 8 + 4; /* skip snap_seq, snap_epoch */ - /* num_snaps * snap_info_t */ - n = le32_to_cpu(pi->v.num_snaps); - while (n--) { - ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + - sizeof(struct ceph_timespec), bad); - *p += sizeof(u64) + /* key */ - 1 + sizeof(u64) + /* u8, snapid */ - sizeof(struct ceph_timespec); - m = ceph_decode_32(p); /* snap name */ - *p += m; + /* skip snaps */ + num = ceph_decode_32(p); + while (num--) { + *p += 8; /* snapid key */ + *p += 1 + 1; /* versions */ + len = ceph_decode_32(p); + *p += len; } - *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; + /* skip removed snaps */ + num = ceph_decode_32(p); + *p += num * (8 + 8); + + *p += 8; /* skip auid */ + pi->flags = ceph_decode_64(p); + + /* ignore the rest */ + + *p = pool_end; + calc_pg_masks(pi); return 0; bad: @@ -535,14 +563,15 @@ bad: static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) { struct ceph_pg_pool_info *pi; - u32 num, len, pool; + u32 num, len; + u64 pool; ceph_decode_32_safe(p, end, num, bad); dout(" %d pool names\n", num); while (num--) { - ceph_decode_32_safe(p, end, pool, bad); + ceph_decode_64_safe(p, end, pool, bad); ceph_decode_32_safe(p, end, len, bad); - dout(" pool %d len %d\n", pool, len); + dout(" pool %llu len %d\n", pool, len); ceph_decode_need(p, end, len, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (pi) { @@ -633,7 +662,6 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) struct ceph_osdmap *map; u16 version; u32 len, max, i; - u8 ev; int err = -EINVAL; void *start = *p; struct ceph_pg_pool_info *pi; @@ -646,9 +674,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) map->pg_temp = RB_ROOT; ceph_decode_16_safe(p, end, version, bad); - if (version > CEPH_OSDMAP_VERSION) { - pr_warning("got unknown v %d > %d of osdmap\n", version, - CEPH_OSDMAP_VERSION); + if (version > 6) { + pr_warning("got unknown v %d > 6 of osdmap\n", version); + goto bad; + } + if (version < 6) { + pr_warning("got old v %d < 6 of osdmap\n", version); goto bad; } @@ -660,20 +691,12 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) ceph_decode_32_safe(p, end, max, bad); while (max--) { - ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); + ceph_decode_need(p, end, 8 + 2, bad); err = -ENOMEM; pi = kzalloc(sizeof(*pi), GFP_NOFS); if (!pi) goto bad; - pi->id = ceph_decode_32(p); - err = -EINVAL; - ev = ceph_decode_8(p); /* encoding version */ - if (ev > CEPH_PG_POOL_VERSION) { - pr_warning("got unknown v %d > %d of ceph_pg_pool\n", - ev, CEPH_PG_POOL_VERSION); - kfree(pi); - goto bad; - } + pi->id = ceph_decode_64(p); err = __decode_pool(p, end, pi); if (err < 0) { kfree(pi); @@ -682,12 +705,10 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) __insert_pg_pool(&map->pg_pools, pi); } - if (version >= 5) { - err = __decode_pool_names(p, end, map); - if (err < 0) { - dout("fail to decode pool names"); - goto bad; - } + err = __decode_pool_names(p, end, map); + if (err < 0) { + dout("fail to decode pool names"); + goto bad; } ceph_decode_32_safe(p, end, map->pool_max, bad); @@ -724,10 +745,13 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) for (i = 0; i < len; i++) { int n, j; struct ceph_pg pgid; + struct ceph_pg_v1 pgid_v1; struct ceph_pg_mapping *pg; ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); - ceph_decode_copy(p, &pgid, sizeof(pgid)); + ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1)); + pgid.pool = le32_to_cpu(pgid_v1.pool); + pgid.seed = le16_to_cpu(pgid_v1.ps); n = ceph_decode_32(p); err = -EINVAL; if (n > (UINT_MAX - sizeof(*pg)) / sizeof(u32)) @@ -745,7 +769,8 @@ struct ceph_osdmap *osdmap_decode(void **p, void *end) err = __insert_pg_mapping(pg, &map->pg_temp); if (err) goto bad; - dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len); + dout(" added pg_temp %lld.%x len %d\n", pgid.pool, pgid.seed, + len); } /* crush */ @@ -784,16 +809,17 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_fsid fsid; u32 epoch = 0; struct ceph_timespec modified; - u32 len, pool; - __s32 new_pool_max, new_flags, max; + s32 len; + u64 pool; + __s64 new_pool_max; + __s32 new_flags, max; void *start = *p; int err = -EINVAL; u16 version; ceph_decode_16_safe(p, end, version, bad); - if (version > CEPH_OSDMAP_INC_VERSION) { - pr_warning("got unknown v %d > %d of inc osdmap\n", version, - CEPH_OSDMAP_INC_VERSION); + if (version > 6) { + pr_warning("got unknown v %d > %d of inc osdmap\n", version, 6); goto bad; } @@ -803,7 +829,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, epoch = ceph_decode_32(p); BUG_ON(epoch != map->epoch+1); ceph_decode_copy(p, &modified, sizeof(modified)); - new_pool_max = ceph_decode_32(p); + new_pool_max = ceph_decode_64(p); new_flags = ceph_decode_32(p); /* full map? */ @@ -853,18 +879,9 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, /* new_pool */ ceph_decode_32_safe(p, end, len, bad); while (len--) { - __u8 ev; struct ceph_pg_pool_info *pi; - ceph_decode_32_safe(p, end, pool, bad); - ceph_decode_need(p, end, 1 + sizeof(pi->v), bad); - ev = ceph_decode_8(p); /* encoding version */ - if (ev > CEPH_PG_POOL_VERSION) { - pr_warning("got unknown v %d > %d of ceph_pg_pool\n", - ev, CEPH_PG_POOL_VERSION); - err = -EINVAL; - goto bad; - } + ceph_decode_64_safe(p, end, pool, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (!pi) { pi = kzalloc(sizeof(*pi), GFP_NOFS); @@ -890,7 +907,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, while (len--) { struct ceph_pg_pool_info *pi; - ceph_decode_32_safe(p, end, pool, bad); + ceph_decode_64_safe(p, end, pool, bad); pi = __lookup_pg_pool(&map->pg_pools, pool); if (pi) __remove_pg_pool(&map->pg_pools, pi); @@ -946,10 +963,13 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, while (len--) { struct ceph_pg_mapping *pg; int j; + struct ceph_pg_v1 pgid_v1; struct ceph_pg pgid; u32 pglen; ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); - ceph_decode_copy(p, &pgid, sizeof(pgid)); + ceph_decode_copy(p, &pgid_v1, sizeof(pgid_v1)); + pgid.pool = le32_to_cpu(pgid_v1.pool); + pgid.seed = le16_to_cpu(pgid_v1.ps); pglen = ceph_decode_32(p); if (pglen) { @@ -975,8 +995,8 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, kfree(pg); goto bad; } - dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, - pglen); + dout(" added pg_temp %lld.%x len %d\n", pgid.pool, + pgid.seed, pglen); } else { /* remove */ __remove_pg_mapping(&map->pg_temp, pgid); @@ -1010,7 +1030,7 @@ bad: * pass a stride back to the caller. */ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 *plen, + u64 off, u64 len, u64 *ono, u64 *oxoff, u64 *oxlen) { @@ -1021,7 +1041,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u32 su_per_object; u64 t, su_offset; - dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen, + dout("mapping %llu~%llu osize %u fl_su %u\n", off, len, osize, su); if (su == 0 || sc == 0) goto invalid; @@ -1054,11 +1074,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, /* * Calculate the length of the extent being written to the selected - * object. This is the minimum of the full length requested (plen) or + * object. This is the minimum of the full length requested (len) or * the remainder of the current stripe being written to. */ - *oxlen = min_t(u64, *plen, su - su_offset); - *plen = *oxlen; + *oxlen = min_t(u64, len, su - su_offset); dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); return 0; @@ -1076,33 +1095,24 @@ EXPORT_SYMBOL(ceph_calc_file_object_mapping); * calculate an object layout (i.e. pgid) from an oid, * file_layout, and osdmap */ -int ceph_calc_object_layout(struct ceph_object_layout *ol, +int ceph_calc_object_layout(struct ceph_pg *pg, const char *oid, struct ceph_file_layout *fl, struct ceph_osdmap *osdmap) { unsigned int num, num_mask; - struct ceph_pg pgid; - int poolid = le32_to_cpu(fl->fl_pg_pool); struct ceph_pg_pool_info *pool; - unsigned int ps; BUG_ON(!osdmap); - - pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); + pg->pool = le32_to_cpu(fl->fl_pg_pool); + pool = __lookup_pg_pool(&osdmap->pg_pools, pg->pool); if (!pool) return -EIO; - ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); - num = le32_to_cpu(pool->v.pg_num); + pg->seed = ceph_str_hash(pool->object_hash, oid, strlen(oid)); + num = pool->pg_num; num_mask = pool->pg_num_mask; - pgid.ps = cpu_to_le16(ps); - pgid.preferred = cpu_to_le16(-1); - pgid.pool = fl->fl_pg_pool; - dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps); - - ol->ol_pgid = pgid; - ol->ol_stripe_unit = fl->fl_object_stripe_unit; + dout("calc_object_layout '%s' pgid %lld.%x\n", oid, pg->pool, pg->seed); return 0; } EXPORT_SYMBOL(ceph_calc_object_layout); @@ -1117,19 +1127,16 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, struct ceph_pg_mapping *pg; struct ceph_pg_pool_info *pool; int ruleno; - unsigned int poolid, ps, pps, t, r; - - poolid = le32_to_cpu(pgid.pool); - ps = le16_to_cpu(pgid.ps); + int r; + u32 pps; - pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); + pool = __lookup_pg_pool(&osdmap->pg_pools, pgid.pool); if (!pool) return NULL; /* pg_temp? */ - t = ceph_stable_mod(ps, le32_to_cpu(pool->v.pg_num), - pool->pgp_num_mask); - pgid.ps = cpu_to_le16(t); + pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, + pool->pgp_num_mask); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { *num = pg->len; @@ -1137,26 +1144,39 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, } /* crush */ - ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, - pool->v.type, pool->v.size); + ruleno = crush_find_rule(osdmap->crush, pool->crush_ruleset, + pool->type, pool->size); if (ruleno < 0) { - pr_err("no crush rule pool %d ruleset %d type %d size %d\n", - poolid, pool->v.crush_ruleset, pool->v.type, - pool->v.size); + pr_err("no crush rule pool %lld ruleset %d type %d size %d\n", + pgid.pool, pool->crush_ruleset, pool->type, + pool->size); return NULL; } - pps = ceph_stable_mod(ps, - le32_to_cpu(pool->v.pgp_num), - pool->pgp_num_mask); - pps += poolid; + if (pool->flags & CEPH_POOL_FLAG_HASHPSPOOL) { + /* hash pool id and seed sothat pool PGs do not overlap */ + pps = crush_hash32_2(CRUSH_HASH_RJENKINS1, + ceph_stable_mod(pgid.seed, pool->pgp_num, + pool->pgp_num_mask), + pgid.pool); + } else { + /* + * legacy ehavior: add ps and pool together. this is + * not a great approach because the PGs from each pool + * will overlap on top of each other: 0.5 == 1.4 == + * 2.3 == ... + */ + pps = ceph_stable_mod(pgid.seed, pool->pgp_num, + pool->pgp_num_mask) + + (unsigned)pgid.pool; + } r = crush_do_rule(osdmap->crush, ruleno, pps, osds, - min_t(int, pool->v.size, *num), + min_t(int, pool->size, *num), osdmap->osd_weight); if (r < 0) { - pr_err("error %d from crush rule: pool %d ruleset %d type %d" - " size %d\n", r, poolid, pool->v.crush_ruleset, - pool->v.type, pool->v.size); + pr_err("error %d from crush rule: pool %lld ruleset %d type %d" + " size %d\n", r, pgid.pool, pool->crush_ruleset, + pool->type, pool->size); return NULL; } *num = r; diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index cd9c21df87d..815a2249cfa 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -12,7 +12,7 @@ /* * build a vector of user pages */ -struct page **ceph_get_direct_page_vector(const char __user *data, +struct page **ceph_get_direct_page_vector(const void __user *data, int num_pages, bool write_page) { struct page **pages; @@ -93,7 +93,7 @@ EXPORT_SYMBOL(ceph_alloc_page_vector); * copy user data into a page vector */ int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, + const void __user *data, loff_t off, size_t len) { int i = 0; @@ -118,17 +118,17 @@ int ceph_copy_user_to_page_vector(struct page **pages, } EXPORT_SYMBOL(ceph_copy_user_to_page_vector); -int ceph_copy_to_page_vector(struct page **pages, - const char *data, +void ceph_copy_to_page_vector(struct page **pages, + const void *data, loff_t off, size_t len) { int i = 0; size_t po = off & ~PAGE_CACHE_MASK; size_t left = len; - size_t l; while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(page_address(pages[i]) + po, data, l); data += l; left -= l; @@ -138,21 +138,20 @@ int ceph_copy_to_page_vector(struct page **pages, i++; } } - return len; } EXPORT_SYMBOL(ceph_copy_to_page_vector); -int ceph_copy_from_page_vector(struct page **pages, - char *data, +void ceph_copy_from_page_vector(struct page **pages, + void *data, loff_t off, size_t len) { int i = 0; size_t po = off & ~PAGE_CACHE_MASK; size_t left = len; - size_t l; while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(data, page_address(pages[i]) + po, l); data += l; left -= l; @@ -162,7 +161,6 @@ int ceph_copy_from_page_vector(struct page **pages, i++; } } - return len; } EXPORT_SYMBOL(ceph_copy_from_page_vector); @@ -170,7 +168,7 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector); * copy user data from a page vector into a user pointer */ int ceph_copy_page_vector_to_user(struct page **pages, - char __user *data, + void __user *data, loff_t off, size_t len) { int i = 0; diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index d11418f97f1..a622ad64acd 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -17,7 +17,8 @@ */ #include <net/ipv6.h> -#include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> +#include <linux/sunrpc/msg_prot.h> #include <linux/slab.h> #include <linux/export.h> diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 107c4528654..88edec929d7 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -574,6 +574,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip; + /* Trim off the checksum blob */ + xdr_buf_trim(buf, GSS_KRB5_TOK_HDR_LEN + tailskip); return GSS_S_COMPLETE; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index ecd1d58bf61..f7d34e7b6f8 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -182,12 +182,6 @@ static void rsi_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int rsi_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, rsi_request); -} - - static int rsi_parse(struct cache_detail *cd, char *mesg, int mlen) { @@ -275,7 +269,7 @@ static struct cache_detail rsi_cache_template = { .hash_size = RSI_HASHMAX, .name = "auth.rpcsec.init", .cache_put = rsi_put, - .cache_upcall = rsi_upcall, + .cache_request = rsi_request, .cache_parse = rsi_parse, .match = rsi_match, .init = rsi_init, @@ -825,13 +819,17 @@ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) * The server uses base of head iovec as read pointer, while the * client uses separate pointer. */ static int -unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) +unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) { int stat = -EINVAL; u32 integ_len, maj_stat; struct xdr_netobj mic; struct xdr_buf integ_buf; + /* Did we already verify the signature on the original pass through? */ + if (rqstp->rq_deferred) + return 0; + integ_len = svc_getnl(&buf->head[0]); if (integ_len & 3) return stat; @@ -854,6 +852,8 @@ unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) goto out; if (svc_getnl(&buf->head[0]) != seq) goto out; + /* trim off the mic at the end before returning */ + xdr_buf_trim(buf, mic.len + 4); stat = 0; out: kfree(mic.data); @@ -1198,7 +1198,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) /* placeholders for length and seq. number: */ svc_putnl(resv, 0); svc_putnl(resv, 0); - if (unwrap_integ_data(&rqstp->rq_arg, + if (unwrap_integ_data(rqstp, &rqstp->rq_arg, gc->gc_seq, rsci->mechctx)) goto garbage_args; break; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 39a4112faf5..25d58e76601 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -196,9 +196,9 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_update); static int cache_make_upcall(struct cache_detail *cd, struct cache_head *h) { - if (!cd->cache_upcall) - return -EINVAL; - return cd->cache_upcall(cd, h); + if (cd->cache_upcall) + return cd->cache_upcall(cd, h); + return sunrpc_cache_pipe_upcall(cd, h); } static inline int cache_is_valid(struct cache_detail *detail, struct cache_head *h) @@ -750,6 +750,18 @@ struct cache_reader { int offset; /* if non-0, we have a refcnt on next request */ }; +static int cache_request(struct cache_detail *detail, + struct cache_request *crq) +{ + char *bp = crq->buf; + int len = PAGE_SIZE; + + detail->cache_request(detail, crq->item, &bp, &len); + if (len < 0) + return -EAGAIN; + return PAGE_SIZE - len; +} + static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos, struct cache_detail *cd) { @@ -784,6 +796,13 @@ static ssize_t cache_read(struct file *filp, char __user *buf, size_t count, rq->readers++; spin_unlock(&queue_lock); + if (rq->len == 0) { + err = cache_request(cd, rq); + if (err < 0) + goto out; + rq->len = err; + } + if (rp->offset == 0 && !test_bit(CACHE_PENDING, &rq->item->flags)) { err = -EAGAIN; spin_lock(&queue_lock); @@ -1140,17 +1159,14 @@ static bool cache_listeners_exist(struct cache_detail *detail) * * Each request is at most one page long. */ -int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, - void (*cache_request)(struct cache_detail *, - struct cache_head *, - char **, - int *)) +int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h) { char *buf; struct cache_request *crq; - char *bp; - int len; + + if (!detail->cache_request) + return -EINVAL; if (!cache_listeners_exist(detail)) { warn_no_listener(detail); @@ -1167,19 +1183,10 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h, return -EAGAIN; } - bp = buf; len = PAGE_SIZE; - - cache_request(detail, h, &bp, &len); - - if (len < 0) { - kfree(buf); - kfree(crq); - return -EAGAIN; - } crq->q.reader = 0; crq->item = cache_get(h); crq->buf = buf; - crq->len = PAGE_SIZE - len; + crq->len = 0; crq->readers = 0; spin_lock(&queue_lock); list_add_tail(&crq->q.list, &detail->queue); @@ -1605,7 +1612,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) if (p == NULL) goto out_nomem; - if (cd->cache_upcall || cd->cache_parse) { + if (cd->cache_request || cd->cache_parse) { p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, cd->u.procfs.proc_ent, &cache_file_operations_procfs, cd); @@ -1614,7 +1621,7 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net) goto out_nomem; } if (cd->cache_show) { - p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, + p = proc_create_data("content", S_IFREG|S_IRUSR, cd->u.procfs.proc_ent, &content_file_operations_procfs, cd); cd->u.procfs.content_ent = p; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a9f7906c1a6..d7a369e6108 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -33,6 +33,7 @@ #include <linux/rcupdate.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/metrics.h> #include <linux/sunrpc/bc_xprt.h> diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 795a0f4e920..3df764dc330 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -26,6 +26,7 @@ #include <net/ipv6.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/xprtsock.h> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index dbf12ac5ecb..89a588b4478 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -515,15 +515,6 @@ EXPORT_SYMBOL_GPL(svc_create_pooled); void svc_shutdown_net(struct svc_serv *serv, struct net *net) { - /* - * The set of xprts (contained in the sv_tempsocks and - * sv_permsocks lists) is now constant, since it is modified - * only by accepting new sockets (done by service threads in - * svc_recv) or aging old ones (done by sv_temptimer), or - * configuration changes (excluded by whatever locking the - * caller is using--nfsd_mutex in the case of nfsd). So it's - * safe to traverse those lists and shut everything down: - */ svc_close_net(serv, net); if (serv->sv_shutdown) @@ -1042,6 +1033,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) /* * dprintk the given error with the address of the client that caused it. */ +#ifdef RPC_DEBUG static __printf(2, 3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) { @@ -1058,6 +1050,9 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) va_end(args); } +#else +static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} +#endif /* * Common routine for processing the RPC request. diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index b8e47fac731..80a6640f329 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -499,7 +499,8 @@ void svc_wake_up(struct svc_serv *serv) rqstp->rq_xprt = NULL; */ wake_up(&rqstp->rq_wait); - } + } else + pool->sp_task_pending = 1; spin_unlock_bh(&pool->sp_lock); } } @@ -634,7 +635,13 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) * long for cache updates. */ rqstp->rq_chandle.thread_wait = 1*HZ; + pool->sp_task_pending = 0; } else { + if (pool->sp_task_pending) { + pool->sp_task_pending = 0; + spin_unlock_bh(&pool->sp_lock); + return ERR_PTR(-EAGAIN); + } /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); @@ -856,7 +863,6 @@ static void svc_age_temp_xprts(unsigned long closure) struct svc_serv *serv = (struct svc_serv *)closure; struct svc_xprt *xprt; struct list_head *le, *next; - LIST_HEAD(to_be_aged); dprintk("svc_age_temp_xprts\n"); @@ -877,25 +883,15 @@ static void svc_age_temp_xprts(unsigned long closure) if (atomic_read(&xprt->xpt_ref.refcount) > 1 || test_bit(XPT_BUSY, &xprt->xpt_flags)) continue; - svc_xprt_get(xprt); - list_move(le, &to_be_aged); + list_del_init(le); set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_DETACHED, &xprt->xpt_flags); - } - spin_unlock_bh(&serv->sv_lock); - - while (!list_empty(&to_be_aged)) { - le = to_be_aged.next; - /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */ - list_del_init(le); - xprt = list_entry(le, struct svc_xprt, xpt_list); - dprintk("queuing xprt %p for closing\n", xprt); /* a thread will dequeue and close it soon */ svc_xprt_enqueue(xprt); - svc_xprt_put(xprt); } + spin_unlock_bh(&serv->sv_lock); mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } @@ -959,21 +955,24 @@ void svc_close_xprt(struct svc_xprt *xprt) } EXPORT_SYMBOL_GPL(svc_close_xprt); -static void svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) +static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; + int ret = 0; spin_lock(&serv->sv_lock); list_for_each_entry(xprt, xprt_list, xpt_list) { if (xprt->xpt_net != net) continue; + ret++; set_bit(XPT_CLOSE, &xprt->xpt_flags); - set_bit(XPT_BUSY, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); } spin_unlock(&serv->sv_lock); + return ret; } -static void svc_clear_pools(struct svc_serv *serv, struct net *net) +static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net) { struct svc_pool *pool; struct svc_xprt *xprt; @@ -988,42 +987,46 @@ static void svc_clear_pools(struct svc_serv *serv, struct net *net) if (xprt->xpt_net != net) continue; list_del_init(&xprt->xpt_ready); + spin_unlock_bh(&pool->sp_lock); + return xprt; } spin_unlock_bh(&pool->sp_lock); } + return NULL; } -static void svc_clear_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) +static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) { struct svc_xprt *xprt; - struct svc_xprt *tmp; - LIST_HEAD(victims); - spin_lock(&serv->sv_lock); - list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { - if (xprt->xpt_net != net) - continue; - list_move(&xprt->xpt_list, &victims); - } - spin_unlock(&serv->sv_lock); - - list_for_each_entry_safe(xprt, tmp, &victims, xpt_list) + while ((xprt = svc_dequeue_net(serv, net))) { + set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_delete_xprt(xprt); + } } +/* + * Server threads may still be running (especially in the case where the + * service is still running in other network namespaces). + * + * So we shut down sockets the same way we would on a running server, by + * setting XPT_CLOSE, enqueuing, and letting a thread pick it up to do + * the close. In the case there are no such other threads, + * threads running, svc_clean_up_xprts() does a simple version of a + * server's main event loop, and in the case where there are other + * threads, we may need to wait a little while and then check again to + * see if they're done. + */ void svc_close_net(struct svc_serv *serv, struct net *net) { - svc_close_list(serv, &serv->sv_tempsocks, net); - svc_close_list(serv, &serv->sv_permsocks, net); + int delay = 0; - svc_clear_pools(serv, net); - /* - * At this point the sp_sockets lists will stay empty, since - * svc_xprt_enqueue will not add new entries without taking the - * sp_lock and checking XPT_BUSY. - */ - svc_clear_list(serv, &serv->sv_tempsocks, net); - svc_clear_list(serv, &serv->sv_permsocks, net); + while (svc_close_list(serv, &serv->sv_permsocks, net) + + svc_close_list(serv, &serv->sv_tempsocks, net)) { + + svc_clean_up_xprts(serv, net); + msleep(delay++); + } } /* diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index a1852e19ed0..c3f9e1ef7f5 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -6,6 +6,7 @@ #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/svcauth.h> #include <linux/sunrpc/gss_api.h> +#include <linux/sunrpc/addr.h> #include <linux/err.h> #include <linux/seq_file.h> #include <linux/hash.h> @@ -17,7 +18,6 @@ #include <linux/user_namespace.h> #define RPCDBG_FACILITY RPCDBG_AUTH -#include <linux/sunrpc/clnt.h> #include "netns.h" @@ -157,11 +157,6 @@ static void ip_map_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int ip_map_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, ip_map_request); -} - static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class, struct in6_addr *addr); static int __ip_map_update(struct cache_detail *cd, struct ip_map *ipm, struct unix_domain *udom, time_t expiry); @@ -475,11 +470,6 @@ static void unix_gid_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static int unix_gid_upcall(struct cache_detail *cd, struct cache_head *h) -{ - return sunrpc_cache_pipe_upcall(cd, h, unix_gid_request); -} - static struct unix_gid *unix_gid_lookup(struct cache_detail *cd, kuid_t uid); static int unix_gid_parse(struct cache_detail *cd, @@ -586,7 +576,7 @@ static struct cache_detail unix_gid_cache_template = { .hash_size = GID_HASHMAX, .name = "auth.unix.gid", .cache_put = unix_gid_put, - .cache_upcall = unix_gid_upcall, + .cache_request = unix_gid_request, .cache_parse = unix_gid_parse, .cache_show = unix_gid_show, .match = unix_gid_match, @@ -885,7 +875,7 @@ static struct cache_detail ip_map_cache_template = { .hash_size = IP_HASHMAX, .name = "auth.unix.ip", .cache_put = ip_map_put, - .cache_upcall = ip_map_upcall, + .cache_request = ip_map_request, .cache_parse = ip_map_parse, .cache_show = ip_map_show, .match = ip_map_match, diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 56055632f15..75edcfad6e2 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -879,6 +879,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, } EXPORT_SYMBOL_GPL(xdr_buf_subsegment); +/** + * xdr_buf_trim - lop at most "len" bytes off the end of "buf" + * @buf: buf to be trimmed + * @len: number of bytes to reduce "buf" by + * + * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note + * that it's possible that we'll trim less than that amount if the xdr_buf is + * too small, or if (for instance) it's all in the head and the parser has + * already read too far into it. + */ +void xdr_buf_trim(struct xdr_buf *buf, unsigned int len) +{ + size_t cur; + unsigned int trim = len; + + if (buf->tail[0].iov_len) { + cur = min_t(size_t, buf->tail[0].iov_len, trim); + buf->tail[0].iov_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->page_len) { + cur = min_t(unsigned int, buf->page_len, trim); + buf->page_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->head[0].iov_len) { + cur = min_t(size_t, buf->head[0].iov_len, trim); + buf->head[0].iov_len -= cur; + trim -= cur; + } +fix_len: + buf->len -= (len - trim); +} +EXPORT_SYMBOL_GPL(xdr_buf_trim); + static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { unsigned int this_len; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index d0074289708..794312f22b9 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -51,6 +51,7 @@ #include <linux/init.h> #include <linux/slab.h> #include <linux/seq_file.h> +#include <linux/sunrpc/addr.h> #include "xprt_rdma.h" diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 37cbda63f45..c1d8476b769 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -33,6 +33,7 @@ #include <linux/udp.h> #include <linux/tcp.h> #include <linux/sunrpc/clnt.h> +#include <linux/sunrpc/addr.h> #include <linux/sunrpc/sched.h> #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/xprtsock.h> @@ -1867,13 +1868,9 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, * @xprt: RPC transport to connect * @transport: socket transport to connect * @create_sock: function to create a socket of the correct type - * - * Invoked by a work queue tasklet. */ -static void xs_local_setup_socket(struct work_struct *work) +static int xs_local_setup_socket(struct sock_xprt *transport) { - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; struct socket *sock; int status = -EIO; @@ -1918,6 +1915,30 @@ out: xprt_clear_connecting(xprt); xprt_wake_pending_tasks(xprt, status); current->flags &= ~PF_FSTRANS; + return status; +} + +static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret; + + if (RPC_IS_ASYNC(task)) { + /* + * We want the AF_LOCAL connect to be resolved in the + * filesystem namespace of the process making the rpc + * call. Thus we connect synchronously. + * + * If we want to support asynchronous AF_LOCAL calls, + * we'll need to figure out how to pass a namespace to + * connect. + */ + rpc_exit(task, -ENOTCONN); + return; + } + ret = xs_local_setup_socket(transport); + if (ret && !RPC_IS_SOFTCONN(task)) + msleep_interruptible(15000); } #ifdef CONFIG_SUNRPC_SWAP @@ -2455,7 +2476,7 @@ static struct rpc_xprt_ops xs_local_ops = { .alloc_slot = xprt_alloc_slot, .rpcbind = xs_local_rpcbind, .set_port = xs_local_set_port, - .connect = xs_connect, + .connect = xs_local_connect, .buf_alloc = rpc_malloc, .buf_free = rpc_free, .send_request = xs_local_send_request, @@ -2628,8 +2649,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) goto out_err; } xprt_set_bound(xprt); - INIT_DELAYED_WORK(&transport->connect_worker, - xs_local_setup_socket); xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL); break; default: |